const puppeteer = require('puppeteer'); /** * Xiaozhu URL Finder - Figure out the correct URL structure */ async function findUrls() { const browser = await puppeteer.launch({ headless: "new", args: ['--no-sandbox', '--disable-setuid-sandbox'] }); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'); console.log('πŸ” Testing Xiaozhu Minsu URLs...\n'); const urlsToTest = [ 'https://minsu.xiaozhu.com/', 'https://minsu.xiaozhu.com/search', 'https://minsu.xiaozhu.com/shanghai', 'https://minsu.xiaozhu.com/city/shanghai', 'https://www.xiaozhu.com/search/', 'https://www.xiaozhu.com/shanghai/' ]; for (const url of urlsToTest) { try { console.log(`Testing: ${url}`); await page.goto(url, { waitUntil: 'networkidle2', timeout: 15000 }); await new Promise(resolve => setTimeout(resolve, 2000)); const info = await page.evaluate(() => { return { url: window.location.href, title: document.title, has404: document.body.textContent.includes('404') || document.body.textContent.includes('ζ‰ΎδΈεˆ°'), hasListings: document.querySelectorAll('[class*="room"], [class*="house"], [class*="card"]').length, bodyClasses: document.body.className, firstDivClasses: document.querySelector('div')?.className || 'none' }; }); console.log(` β†’ Final URL: ${info.url}`); console.log(` β†’ Title: ${info.title}`); console.log(` β†’ Has 404: ${info.has404}`); console.log(` β†’ Listing elements: ${info.hasListings}`); if (!info.has404) { console.log(` βœ… VALID PAGE!`); // Take screenshot const filename = `./xiaozhu_test_${Date.now()}.png`; await page.screenshot({ path: filename, fullPage: true }); console.log(` πŸ“Έ Screenshot: ${filename}`); } else { console.log(` ❌ 404 page`); } console.log(''); } catch (err) { console.log(` ❌ Error: ${err.message}\n`); } } await browser.close(); console.log('\nπŸ’‘ Next steps:'); console.log('1. Use Firefox to manually navigate to minsu.xiaozhu.com'); console.log('2. Search for "上桷 εΎζ±‡εŒΊ"'); console.log('3. Copy the final URL from address bar'); console.log('4. Update xiaozhu_minsu_scraper.js with the correct URL pattern'); } findUrls().catch(console.error);