personal-hub/tools/xiaozhu_inspector.js
StillHammer 3c8162c990 Sync couple_matters: December crisis, separation agreement, daily check v2, xiaozhu search
Major updates:
- December 2025 crisis documentation and separation agreement
- Daily check system v2 with multiple card categories
- Xiaozhu rental search tools and results
- Exit plan documentation
- Message drafts for family communication
- Confluent moved to CONSTANT
- Updated profiles and promises

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-23 07:04:02 +08:00

207 lines
6.9 KiB
JavaScript

const puppeteer = require('puppeteer');
const fs = require('fs');
/**
* Xiaozhu Inspector - Inspect page structure without login
* This will help identify the correct CSS selectors
*/
async function inspectXiaozhu() {
console.log('🔍 Launching browser to inspect Xiaozhu...');
const browser = await puppeteer.launch({
headless: "new", // Run in headless mode for WSL compatibility
defaultViewport: { width: 1920, height: 1080 },
args: ['--no-sandbox', '--disable-setuid-sandbox'] // Required for WSL
});
const page = await browser.newPage();
// Try different URL patterns
const searchUrls = [
'https://www.xiaozhu.com/search-shanghai-徐汇区/',
'https://www.xiaozhu.com/search/shanghai/',
'https://www.xiaozhu.com/shanghai/',
'https://www.xiaozhu.com/'
];
for (const url of searchUrls) {
console.log(`\n📡 Trying: ${url}`);
try {
await page.goto(url, {
waitUntil: 'networkidle2',
timeout: 15000
});
console.log(`✅ Loaded: ${page.url()}`);
// Wait a bit for dynamic content
await new Promise(resolve => setTimeout(resolve, 3000));
// Take screenshot
const screenshotPath = `./xiaozhu_screenshot_${Date.now()}.png`;
await page.screenshot({ path: screenshotPath, fullPage: true });
console.log(`📸 Screenshot saved: ${screenshotPath}`);
// Extract page structure
const pageInfo = await page.evaluate(() => {
const info = {
title: document.title,
url: window.location.href,
bodyClasses: document.body.className,
// Try to find common patterns for listing containers
possibleContainers: [],
possibleListingCards: [],
// Look for elements that might be listings
allClasses: new Set(),
allIds: new Set()
};
// Collect all classes and IDs
document.querySelectorAll('*').forEach(el => {
if (el.className && typeof el.className === 'string') {
el.className.split(' ').forEach(cls => {
if (cls) info.allClasses.add(cls);
});
}
if (el.id) info.allIds.add(el.id);
});
// Look for elements that might contain listings
const possibleSelectors = [
'.list', '.listing', '.result', '.item', '.card',
'[class*="list"]', '[class*="result"]', '[class*="house"]',
'[class*="room"]', '[class*="apartment"]'
];
possibleSelectors.forEach(selector => {
try {
const elements = document.querySelectorAll(selector);
if (elements.length > 0 && elements.length < 100) {
info.possibleContainers.push({
selector: selector,
count: elements.length,
sample: elements[0]?.className || elements[0]?.id || 'no class/id'
});
}
} catch (e) {}
});
// Try to extract any visible listings
const extractedListings = [];
// Common patterns
const cardSelectors = [
'.pho_item', '.room_box', '.result_list li', '.house_item',
'[class*="card"]', '[class*="item"]'
];
for (const sel of cardSelectors) {
try {
const cards = document.querySelectorAll(sel);
if (cards.length > 2 && cards.length < 50) {
cards.forEach((card, i) => {
if (i < 3) { // Sample first 3
const listing = {
selector: sel,
html: card.innerHTML.substring(0, 500),
text: card.textContent.substring(0, 200).trim(),
classes: card.className,
// Try to find price
priceElements: [],
titleElements: [],
locationElements: []
};
// Look for price (¥, 元, number)
card.querySelectorAll('*').forEach(el => {
const text = el.textContent;
if (text.match(/[¥¥]?\d+[元\/]/)) {
listing.priceElements.push({
tag: el.tagName,
class: el.className,
text: text.substring(0, 50)
});
}
// Title usually in h2, h3, or has 'title' in class
if (['H1', 'H2', 'H3', 'H4'].includes(el.tagName) ||
(el.className && el.className.includes('title'))) {
listing.titleElements.push({
tag: el.tagName,
class: el.className,
text: text.substring(0, 100)
});
}
});
extractedListings.push(listing);
}
});
break; // Found good selector, stop
}
} catch (e) {}
}
info.allClasses = Array.from(info.allClasses);
info.allIds = Array.from(info.allIds);
info.extractedListings = extractedListings;
return info;
});
// Save page info
const infoPath = `./xiaozhu_pageinfo_${Date.now()}.json`;
fs.writeFileSync(infoPath, JSON.stringify(pageInfo, null, 2));
console.log(`💾 Page info saved: ${infoPath}`);
// Print summary
console.log('\n📊 PAGE ANALYSIS:');
console.log(` Title: ${pageInfo.title}`);
console.log(` URL: ${pageInfo.url}`);
console.log(` Total classes found: ${pageInfo.allClasses.length}`);
console.log(` Total IDs found: ${pageInfo.allIds.length}`);
if (pageInfo.possibleContainers.length > 0) {
console.log('\n🎯 POSSIBLE LISTING CONTAINERS:');
pageInfo.possibleContainers.slice(0, 5).forEach(c => {
console.log(` - ${c.selector} (${c.count} elements)`);
});
}
if (pageInfo.extractedListings.length > 0) {
console.log('\n📝 SAMPLE LISTINGS EXTRACTED:');
pageInfo.extractedListings.forEach((l, i) => {
console.log(`\n Listing ${i + 1} (selector: ${l.selector}):`);
if (l.titleElements.length > 0) {
console.log(` Title: ${l.titleElements[0].text}`);
}
if (l.priceElements.length > 0) {
console.log(` Price: ${l.priceElements[0].text}`);
}
});
}
console.log('\n✅ Successfully inspected this URL!');
console.log('📸 Check the screenshot and JSON file for details');
// Found a working URL, no need to try others
await browser.close();
return;
} catch (err) {
console.log(`❌ Failed to load: ${err.message}`);
continue; // Try next URL
}
}
console.log('\n❌ All URLs failed. Site might be blocking automated access.');
await browser.close();
}
inspectXiaozhu().catch(console.error);