const puppeteer = require('puppeteer'); const fs = require('fs'); /** * Xiaozhu Navigator - Try to navigate from homepage to search results */ async function navigateXiaozhu() { console.log('๐Ÿš€ Launching browser to navigate Xiaozhu...'); const browser = await puppeteer.launch({ headless: "new", defaultViewport: { width: 1920, height: 1080 }, args: ['--no-sandbox', '--disable-setuid-sandbox'] }); const page = await browser.newPage(); // Set user agent to avoid bot detection await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'); try { console.log('๐Ÿ“ก Loading homepage...'); await page.goto('https://www.xiaozhu.com/', { waitUntil: 'networkidle2', timeout: 30000 }); console.log(`โœ… Loaded: ${page.url()}`); // Wait for content to load await new Promise(resolve => setTimeout(resolve, 3000)); // Take screenshot of homepage await page.screenshot({ path: './xiaozhu_home.png', fullPage: true }); console.log('๐Ÿ“ธ Homepage screenshot saved'); // Try to find search input and search for Shanghai console.log('\n๐Ÿ” Looking for search functionality...'); // Try common selectors for search const searchSelectors = [ 'input[placeholder*="ๆœ็ดข"]', 'input[placeholder*="็›ฎ็š„ๅœฐ"]', 'input[placeholder*="ๅŸŽๅธ‚"]', 'input.search', '#search', '.search-input', 'input[type="text"]' ]; let searchInput = null; for (const selector of searchSelectors) { try { const element = await page.$(selector); if (element) { searchInput = element; console.log(`โœ… Found search input: ${selector}`); break; } } catch (e) {} } if (searchInput) { // Try to search for Shanghai Xuhui console.log('โŒจ๏ธ Typing search query...'); await searchInput.click(); await searchInput.type('ไธŠๆตทๅพๆฑ‡', { delay: 100 }); await new Promise(resolve => setTimeout(resolve, 1000)); // Take screenshot after typing await page.screenshot({ path: './xiaozhu_search_typed.png' }); console.log('๐Ÿ“ธ Search typed screenshot saved'); // Look for suggestions or submit button const submitSelectors = [ 'button[type="submit"]', '.search-button', '.el-button--primary', 'button.submit' ]; for (const selector of submitSelectors) { try { const button = await page.$(selector); if (button) { console.log(`๐Ÿ–ฑ๏ธ Clicking search button: ${selector}`); await button.click(); await new Promise(resolve => setTimeout(resolve, 3000)); break; } } catch (e) {} } } else { // No search input found, try to find direct links to Shanghai console.log('โŒ No search input found, looking for Shanghai links...'); const links = await page.evaluate(() => { const allLinks = Array.from(document.querySelectorAll('a')); return allLinks .filter(a => a.textContent.includes('ไธŠๆตท') || a.textContent.includes('Shanghai') || a.href.includes('shanghai')) .map(a => ({ text: a.textContent.trim().substring(0, 50), href: a.href })) .slice(0, 10); }); console.log('\n๐Ÿ”— Found Shanghai-related links:'); links.forEach((link, i) => { console.log(` ${i + 1}. ${link.text} โ†’ ${link.href}`); }); if (links.length > 0) { const firstLink = links[0].href; console.log(`\n๐Ÿ–ฑ๏ธ Navigating to: ${firstLink}`); await page.goto(firstLink, { waitUntil: 'networkidle2' }); await new Promise(resolve => setTimeout(resolve, 3000)); } } // Extract current URL and page info const currentUrl = page.url(); console.log(`\n๐Ÿ“ Current URL: ${currentUrl}`); // Take final screenshot await page.screenshot({ path: './xiaozhu_final.png', fullPage: true }); console.log('๐Ÿ“ธ Final screenshot saved'); // Extract listings if any console.log('\n๐Ÿ” Extracting listings from current page...'); const listings = await page.evaluate(() => { const results = []; // Try multiple possible selectors for listings const possibleSelectors = [ '.room-item', '.house-item', '.list-item', '.result-item', '[class*="room"]', '[class*="house"]', '[class*="listing"]' ]; for (const selector of possibleSelectors) { const items = document.querySelectorAll(selector); if (items.length > 2 && items.length < 100) { items.forEach((item, i) => { if (i < 5) { // Only first 5 const result = { selector: selector, text: item.textContent.substring(0, 200).trim() }; // Try to find price const priceEl = item.querySelector('[class*="price"]') || Array.from(item.querySelectorAll('*')).find(el => el.textContent.match(/[ยฅ๏ฟฅ]?\d+[ๅ…ƒ\/]/)); if (priceEl) { result.price = priceEl.textContent.trim(); } // Try to find title/name const titleEl = item.querySelector('h2, h3, h4, [class*="title"]'); if (titleEl) { result.title = titleEl.textContent.trim(); } // Try to find link const linkEl = item.querySelector('a'); if (linkEl) { result.url = linkEl.href; } results.push(result); } }); if (results.length > 0) break; // Found good selector } } return results; }); if (listings.length > 0) { console.log(`\nโœ… Found ${listings.length} listings!`); listings.forEach((listing, i) => { console.log(`\n Listing ${i + 1}:`); if (listing.title) console.log(` Title: ${listing.title}`); if (listing.price) console.log(` Price: ${listing.price}`); if (listing.url) console.log(` URL: ${listing.url}`); }); // Save listings fs.writeFileSync('./xiaozhu_listings_found.json', JSON.stringify(listings, null, 2)); console.log('\n๐Ÿ’พ Listings saved to xiaozhu_listings_found.json'); } else { console.log('\nโŒ No listings found on this page'); // Save page HTML for manual inspection const html = await page.content(); fs.writeFileSync('./xiaozhu_page.html', html); console.log('๐Ÿ’พ Page HTML saved to xiaozhu_page.html for manual inspection'); } // Log the final URL pattern for future use console.log('\n๐Ÿ“‹ SUMMARY:'); console.log(` Final URL: ${currentUrl}`); console.log(` Listings found: ${listings.length}`); console.log(` Screenshots: xiaozhu_home.png, xiaozhu_final.png`); if (currentUrl.includes('/')) { const urlPattern = currentUrl.split('?')[0]; // Remove query params console.log(` \n๐Ÿ’ก URL pattern to use: ${urlPattern}`); } } catch (err) { console.error('โŒ Error:', err.message); } finally { await browser.close(); } } navigateXiaozhu().catch(console.error);