Major updates: - December 2025 crisis documentation and separation agreement - Daily check system v2 with multiple card categories - Xiaozhu rental search tools and results - Exit plan documentation - Message drafts for family communication - Confluent moved to CONSTANT - Updated profiles and promises 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
207 lines
6.9 KiB
JavaScript
207 lines
6.9 KiB
JavaScript
const puppeteer = require('puppeteer');
|
|
const fs = require('fs');
|
|
|
|
/**
|
|
* Xiaozhu Inspector - Inspect page structure without login
|
|
* This will help identify the correct CSS selectors
|
|
*/
|
|
|
|
async function inspectXiaozhu() {
|
|
console.log('🔍 Launching browser to inspect Xiaozhu...');
|
|
|
|
const browser = await puppeteer.launch({
|
|
headless: "new", // Run in headless mode for WSL compatibility
|
|
defaultViewport: { width: 1920, height: 1080 },
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox'] // Required for WSL
|
|
});
|
|
|
|
const page = await browser.newPage();
|
|
|
|
// Try different URL patterns
|
|
const searchUrls = [
|
|
'https://www.xiaozhu.com/search-shanghai-徐汇区/',
|
|
'https://www.xiaozhu.com/search/shanghai/',
|
|
'https://www.xiaozhu.com/shanghai/',
|
|
'https://www.xiaozhu.com/'
|
|
];
|
|
|
|
for (const url of searchUrls) {
|
|
console.log(`\n📡 Trying: ${url}`);
|
|
|
|
try {
|
|
await page.goto(url, {
|
|
waitUntil: 'networkidle2',
|
|
timeout: 15000
|
|
});
|
|
|
|
console.log(`✅ Loaded: ${page.url()}`);
|
|
|
|
// Wait a bit for dynamic content
|
|
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
|
|
// Take screenshot
|
|
const screenshotPath = `./xiaozhu_screenshot_${Date.now()}.png`;
|
|
await page.screenshot({ path: screenshotPath, fullPage: true });
|
|
console.log(`📸 Screenshot saved: ${screenshotPath}`);
|
|
|
|
// Extract page structure
|
|
const pageInfo = await page.evaluate(() => {
|
|
const info = {
|
|
title: document.title,
|
|
url: window.location.href,
|
|
bodyClasses: document.body.className,
|
|
|
|
// Try to find common patterns for listing containers
|
|
possibleContainers: [],
|
|
possibleListingCards: [],
|
|
|
|
// Look for elements that might be listings
|
|
allClasses: new Set(),
|
|
allIds: new Set()
|
|
};
|
|
|
|
// Collect all classes and IDs
|
|
document.querySelectorAll('*').forEach(el => {
|
|
if (el.className && typeof el.className === 'string') {
|
|
el.className.split(' ').forEach(cls => {
|
|
if (cls) info.allClasses.add(cls);
|
|
});
|
|
}
|
|
if (el.id) info.allIds.add(el.id);
|
|
});
|
|
|
|
// Look for elements that might contain listings
|
|
const possibleSelectors = [
|
|
'.list', '.listing', '.result', '.item', '.card',
|
|
'[class*="list"]', '[class*="result"]', '[class*="house"]',
|
|
'[class*="room"]', '[class*="apartment"]'
|
|
];
|
|
|
|
possibleSelectors.forEach(selector => {
|
|
try {
|
|
const elements = document.querySelectorAll(selector);
|
|
if (elements.length > 0 && elements.length < 100) {
|
|
info.possibleContainers.push({
|
|
selector: selector,
|
|
count: elements.length,
|
|
sample: elements[0]?.className || elements[0]?.id || 'no class/id'
|
|
});
|
|
}
|
|
} catch (e) {}
|
|
});
|
|
|
|
// Try to extract any visible listings
|
|
const extractedListings = [];
|
|
|
|
// Common patterns
|
|
const cardSelectors = [
|
|
'.pho_item', '.room_box', '.result_list li', '.house_item',
|
|
'[class*="card"]', '[class*="item"]'
|
|
];
|
|
|
|
for (const sel of cardSelectors) {
|
|
try {
|
|
const cards = document.querySelectorAll(sel);
|
|
if (cards.length > 2 && cards.length < 50) {
|
|
cards.forEach((card, i) => {
|
|
if (i < 3) { // Sample first 3
|
|
const listing = {
|
|
selector: sel,
|
|
html: card.innerHTML.substring(0, 500),
|
|
text: card.textContent.substring(0, 200).trim(),
|
|
classes: card.className,
|
|
|
|
// Try to find price
|
|
priceElements: [],
|
|
titleElements: [],
|
|
locationElements: []
|
|
};
|
|
|
|
// Look for price (¥, 元, number)
|
|
card.querySelectorAll('*').forEach(el => {
|
|
const text = el.textContent;
|
|
if (text.match(/[¥¥]?\d+[元\/]/)) {
|
|
listing.priceElements.push({
|
|
tag: el.tagName,
|
|
class: el.className,
|
|
text: text.substring(0, 50)
|
|
});
|
|
}
|
|
|
|
// Title usually in h2, h3, or has 'title' in class
|
|
if (['H1', 'H2', 'H3', 'H4'].includes(el.tagName) ||
|
|
(el.className && el.className.includes('title'))) {
|
|
listing.titleElements.push({
|
|
tag: el.tagName,
|
|
class: el.className,
|
|
text: text.substring(0, 100)
|
|
});
|
|
}
|
|
});
|
|
|
|
extractedListings.push(listing);
|
|
}
|
|
});
|
|
break; // Found good selector, stop
|
|
}
|
|
} catch (e) {}
|
|
}
|
|
|
|
info.allClasses = Array.from(info.allClasses);
|
|
info.allIds = Array.from(info.allIds);
|
|
info.extractedListings = extractedListings;
|
|
|
|
return info;
|
|
});
|
|
|
|
// Save page info
|
|
const infoPath = `./xiaozhu_pageinfo_${Date.now()}.json`;
|
|
fs.writeFileSync(infoPath, JSON.stringify(pageInfo, null, 2));
|
|
console.log(`💾 Page info saved: ${infoPath}`);
|
|
|
|
// Print summary
|
|
console.log('\n📊 PAGE ANALYSIS:');
|
|
console.log(` Title: ${pageInfo.title}`);
|
|
console.log(` URL: ${pageInfo.url}`);
|
|
console.log(` Total classes found: ${pageInfo.allClasses.length}`);
|
|
console.log(` Total IDs found: ${pageInfo.allIds.length}`);
|
|
|
|
if (pageInfo.possibleContainers.length > 0) {
|
|
console.log('\n🎯 POSSIBLE LISTING CONTAINERS:');
|
|
pageInfo.possibleContainers.slice(0, 5).forEach(c => {
|
|
console.log(` - ${c.selector} (${c.count} elements)`);
|
|
});
|
|
}
|
|
|
|
if (pageInfo.extractedListings.length > 0) {
|
|
console.log('\n📝 SAMPLE LISTINGS EXTRACTED:');
|
|
pageInfo.extractedListings.forEach((l, i) => {
|
|
console.log(`\n Listing ${i + 1} (selector: ${l.selector}):`);
|
|
if (l.titleElements.length > 0) {
|
|
console.log(` Title: ${l.titleElements[0].text}`);
|
|
}
|
|
if (l.priceElements.length > 0) {
|
|
console.log(` Price: ${l.priceElements[0].text}`);
|
|
}
|
|
});
|
|
}
|
|
|
|
console.log('\n✅ Successfully inspected this URL!');
|
|
console.log('📸 Check the screenshot and JSON file for details');
|
|
|
|
// Found a working URL, no need to try others
|
|
await browser.close();
|
|
return;
|
|
|
|
} catch (err) {
|
|
console.log(`❌ Failed to load: ${err.message}`);
|
|
continue; // Try next URL
|
|
}
|
|
}
|
|
|
|
console.log('\n❌ All URLs failed. Site might be blocking automated access.');
|
|
await browser.close();
|
|
}
|
|
|
|
inspectXiaozhu().catch(console.error);
|