const puppeteer = require('puppeteer'); const fs = require('fs'); /** * Xiaozhu Click URLs - Based on working xiaozhu_fixed.js * Clicks each listing to extract URLs */ const CONFIG = { city: '上海', searchQuery: '交通大学', latitude: 31.1880, longitude: 121.4367, maxListings: 10 }; console.log('🔗 Xiaozhu URL Extractor (Click Method)'); console.log(`📍 Search: ${CONFIG.searchQuery}`); console.log(`🎯 Will click up to ${CONFIG.maxListings} listings\n`); async function wait(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } async function extractURLs() { const browser = await puppeteer.launch({ headless: "new", defaultViewport: { width: 414, height: 896 }, args: ['--no-sandbox', '--disable-setuid-sandbox'] }); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15'); const context = browser.defaultBrowserContext(); await context.overridePermissions('https://minsu.xiaozhu.com', ['geolocation']); await page.setGeolocation({ latitude: CONFIG.latitude, longitude: CONFIG.longitude, accuracy: 100 }); console.log('🌍 Geolocation set to Shanghai Xujiahui\n'); try { // Navigate and search (same as xiaozhu_fixed.js) console.log('🌐 Loading homepage...'); await page.goto('https://minsu.xiaozhu.com/', { waitUntil: 'networkidle2', timeout: 30000 }); await wait(3000); const searchInput = await page.$('input[type="text"]'); if (searchInput) { console.log('⌨️ Typing search query...'); await searchInput.click(); await wait(500); await page.keyboard.down('Control'); await page.keyboard.press('A'); await page.keyboard.up('Control'); await page.keyboard.press('Backspace'); await searchInput.type(CONFIG.searchQuery, { delay: 150 }); await wait(2000); console.log('👆 Clicking suggestion...'); const clicked = await page.evaluate((searchQuery, keyword) => { const allElements = document.querySelectorAll('div, li, a, span'); const matchingElements = []; for (const el of allElements) { const text = el.textContent.trim(); const rect = el.getBoundingClientRect(); if (rect.width > 0 && rect.height > 0) { if (text.includes(keyword)) { matchingElements.push({ el, text, score: 100 }); } else if (text.includes(searchQuery)) { matchingElements.push({ el, text, score: 80 }); } else if (text.includes('上海') && text.length < 15) { matchingElements.push({ el, text, score: 30 }); } } } matchingElements.sort((a, b) => b.score - a.score); if (matchingElements.length > 0) { console.log(`Clicking: "${matchingElements[0].text}"`); matchingElements[0].el.click(); return true; } return false; }, CONFIG.searchQuery, CONFIG.searchQuery); if (clicked) { console.log('✅ Clicked suggestion\n'); await wait(4000); } else { console.log('⚠️ No suggestion, pressing Enter\n'); await page.keyboard.press('Enter'); await wait(3000); } } // Scroll to load listings console.log('⏬ Scrolling to load all listings...'); await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); await wait(5000); // Get listing count const listingCount = await page.evaluate(() => { const items = document.querySelectorAll('.list-item'); return items.length; }); console.log(`📊 Found ${listingCount} listings\n`); if (listingCount === 0) { console.log('❌ No listings found'); await page.screenshot({ path: './xiaozhu_click_no_listings.png' }); await browser.close(); return; } const results = []; // Click each listing to get URL for (let i = 0; i < Math.min(listingCount, CONFIG.maxListings); i++) { console.log(`🔍 Listing ${i + 1}/${Math.min(listingCount, CONFIG.maxListings)}...`); try { // Extract info before clicking const info = await page.evaluate((index) => { const items = document.querySelectorAll('.list-item'); const item = items[index]; if (!item) return null; return { title: item.querySelector('.list-title')?.textContent.trim() || 'No title', price: item.querySelector('.list-price')?.textContent.trim() || 'No price', priceNum: parseInt(item.querySelector('.list-price')?.textContent.match(/\d+/)?.[0] || '0'), image: item.querySelector('img')?.src }; }, i); if (!info) { console.log(' ⚠️ Could not extract info\n'); continue; } console.log(` 📝 ${info.title.substring(0, 60)}...`); console.log(` 💰 ${info.price}`); // Click it! await page.evaluate((index) => { const items = document.querySelectorAll('.list-item'); const item = items[index]; if (item) item.click(); }, i); console.log(' 👆 Clicked, waiting for page load...'); await wait(5000); // Wait for new page to load // Get URL const url = page.url(); console.log(` 🔗 URL: ${url}`); results.push({ index: i + 1, ...info, url: url }); // Go back to list console.log(' ⬅️ Going back...'); await page.goBack({ waitUntil: 'networkidle2', timeout: 15000 }); await wait(2000); console.log(''); } catch (err) { console.log(` ❌ Error: ${err.message}`); // Try to recover try { const currentUrl = page.url(); if (!currentUrl.includes('minsu.xiaozhu.com/')) { console.log(' 🔄 Reloading list page...'); await page.goBack(); await wait(3000); } } catch (e) { console.log(' ⚠️ Recovery failed, continuing...\n'); } } } // Save results const outputFile = './xiaozhu_urls.json'; fs.writeFileSync(outputFile, JSON.stringify(results, null, 2)); console.log(`\n💾 Saved ${results.length} URLs to ${outputFile}\n`); // Print summary console.log('=' .repeat(70)); console.log('📋 EXTRACTED URLS:\n'); results.forEach((r) => { console.log(`${r.index}. ${r.title?.substring(0, 70)}`); console.log(` 💰 ${r.price} (¥${r.priceNum}/day × 29 days = ¥${r.priceNum * 29})`); console.log(` 🔗 ${r.url}`); console.log(''); }); console.log('=' .repeat(70)); } catch (err) { console.error('❌ Fatal error:', err.message); await page.screenshot({ path: './xiaozhu_click_error.png' }); } finally { await browser.close(); } } extractURLs().catch(console.error);