couple_matters/tools/xiaozhu_url_finder.js
StillHammer 92c2a9f022 Sync couple_matters: December crisis, separation agreement, daily check v2, xiaozhu search
Major updates:
- December 2025 crisis documentation and separation agreement
- Daily check system v2 with multiple card categories
- Xiaozhu rental search tools and results
- Exit plan documentation
- Message drafts for family communication
- Confluent moved to CONSTANT
- Updated profiles and promises

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-23 06:54:08 +08:00

83 lines
2.5 KiB
JavaScript

const puppeteer = require('puppeteer');
/**
* Xiaozhu URL Finder - Figure out the correct URL structure
*/
async function findUrls() {
const browser = await puppeteer.launch({
headless: "new",
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const page = await browser.newPage();
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
console.log('🔍 Testing Xiaozhu Minsu URLs...\n');
const urlsToTest = [
'https://minsu.xiaozhu.com/',
'https://minsu.xiaozhu.com/search',
'https://minsu.xiaozhu.com/shanghai',
'https://minsu.xiaozhu.com/city/shanghai',
'https://www.xiaozhu.com/search/',
'https://www.xiaozhu.com/shanghai/'
];
for (const url of urlsToTest) {
try {
console.log(`Testing: ${url}`);
await page.goto(url, {
waitUntil: 'networkidle2',
timeout: 15000
});
await new Promise(resolve => setTimeout(resolve, 2000));
const info = await page.evaluate(() => {
return {
url: window.location.href,
title: document.title,
has404: document.body.textContent.includes('404') ||
document.body.textContent.includes('找不到'),
hasListings: document.querySelectorAll('[class*="room"], [class*="house"], [class*="card"]').length,
bodyClasses: document.body.className,
firstDivClasses: document.querySelector('div')?.className || 'none'
};
});
console.log(` → Final URL: ${info.url}`);
console.log(` → Title: ${info.title}`);
console.log(` → Has 404: ${info.has404}`);
console.log(` → Listing elements: ${info.hasListings}`);
if (!info.has404) {
console.log(` ✅ VALID PAGE!`);
// Take screenshot
const filename = `./xiaozhu_test_${Date.now()}.png`;
await page.screenshot({ path: filename, fullPage: true });
console.log(` 📸 Screenshot: ${filename}`);
} else {
console.log(` ❌ 404 page`);
}
console.log('');
} catch (err) {
console.log(` ❌ Error: ${err.message}\n`);
}
}
await browser.close();
console.log('\n💡 Next steps:');
console.log('1. Use Firefox to manually navigate to minsu.xiaozhu.com');
console.log('2. Search for "上海 徐汇区"');
console.log('3. Copy the final URL from address bar');
console.log('4. Update xiaozhu_minsu_scraper.js with the correct URL pattern');
}
findUrls().catch(console.error);