const puppeteer = require('puppeteer'); const url = 'https://minsu.xiaozhu.com/detail?luId=354701406371854&startDate=2025-12-21&endDate=2025-12-22'; console.log('šŸ“ Extracting address from listing...\n'); (async () => { const browser = await puppeteer.launch({ headless: "new", args: ['--no-sandbox', '--disable-setuid-sandbox'] }); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15'); try { console.log('🌐 Loading page...'); await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 }); await new Promise(resolve => setTimeout(resolve, 5000)); console.log('šŸ“ø Taking screenshot...'); await page.screenshot({ path: './xiaozhu_detail_page.png', fullPage: true }); console.log('šŸ” Extracting information...\n'); const info = await page.evaluate(() => { const result = { title: '', address: '', location: '', nearbyLandmarks: [], metro: [], price: '', amenities: [], fullText: '' }; // Title const titleEl = document.querySelector('h1, .title, [class*="title"]'); if (titleEl) result.title = titleEl.textContent.trim(); // Look for address keywords const bodyText = document.body.textContent; result.fullText = bodyText.substring(0, 2000); // Common address patterns const addressPatterns = [ /地址[::]\s*(.{5,50})/, /ä½äŗŽ[::]?\s*(.{5,50})/, /čÆ¦ē»†åœ°å€[::]\s*(.{5,50})/, /([^ļ¼Œć€‚]{2,}č·Æ\d+号[^ļ¼Œć€‚]{0,20})/, /([^ļ¼Œć€‚]{2,}č”—\d+号[^ļ¼Œć€‚]{0,20})/ ]; for (const pattern of addressPatterns) { const match = bodyText.match(pattern); if (match && match[1]) { result.address = match[1].trim(); break; } } // Look for specific elements const allElements = document.querySelectorAll('div, span, p'); allElements.forEach(el => { const text = el.textContent.trim(); // Address if (text.includes('地址') || text.includes('ä½äŗŽ')) { if (text.length < 100 && text.length > 5) { result.location = text; } } // Metro if (text.includes('地铁') || text.includes('å·ēŗæ')) { if (text.length < 50) { result.metro.push(text); } } // Landmarks if (text.includes('医院') || text.includes('公园') || text.includes('å•†åœŗ')) { if (text.length < 30) { result.nearbyLandmarks.push(text); } } // Amenities if (text.includes('åŽØęˆæ') || text.includes('冰箱') || text.includes('ę“—č”£ęœŗ')) { if (text.length < 20) { result.amenities.push(text); } } }); // Price const priceEl = document.querySelector('[class*="price"]'); if (priceEl) result.price = priceEl.textContent.trim(); return result; }); console.log('šŸ“‹ LISTING INFORMATION:\n'); console.log(`Title: ${info.title || 'Not found'}`); console.log(`\nPrice: ${info.price || 'Not found'}`); console.log(`\nAddress: ${info.address || 'Not found in structured format'}`); console.log(`\nLocation Info: ${info.location || 'Not found'}`); if (info.metro.length > 0) { console.log(`\nMetro: ${info.metro.slice(0, 3).join(', ')}`); } if (info.nearbyLandmarks.length > 0) { console.log(`\nNearby: ${info.nearbyLandmarks.slice(0, 5).join(', ')}`); } if (info.amenities.length > 0) { console.log(`\nAmenities: ${info.amenities.slice(0, 5).join(', ')}`); } console.log('\n\nšŸ“„ PAGE TEXT PREVIEW (first 500 chars):\n'); console.log(info.fullText.substring(0, 500)); console.log('\n\nšŸ’” Check xiaozhu_detail_page.png for full page screenshot'); } catch (err) { console.error('āŒ Error:', err.message); } finally { await browser.close(); } })();