const puppeteer = require('puppeteer'); const fs = require('fs'); /** * Xiaozhu Scraper - Location appart près de Jiaoda Xujiahui Campus * Dates: 24 dec 2025 → 22 jan 2026 * Budget: 3000-5000 RMB/mois (idéal 3000-4000) * Critères: Cuisine + frigo requis, machine à laver bonus */ const CONFIG = { // Dates de location checkIn: '2025-12-24', checkOut: '2026-01-22', // Budget (RMB/mois) budgetMin: 0, budgetMax: 5000, budgetIdeal: 4000, // Localisation (Xujiahui campus) targetDistrict: '徐汇区', // Xuhui District targetMetroLines: [1, 7, 9, 10, 11], // Lignes accessibles maxMetroTime: 25, // minutes max // Équipements requis required: ['kitchen', 'fridge'], bonus: ['washingMachine', 'metro'], // Output outputFile: './xiaozhu_results.json', outputMarkdown: './xiaozhu_results.md', topN: 20 }; // Stations de métro proches du campus (Xujiahui) const PREFERRED_STATIONS = [ { name: '交通大学', lines: [10, 11], minutes: 0 }, { name: '徐家汇', lines: [1, 9, 11], minutes: 5 }, { name: '衡山路', lines: [1], minutes: 10 }, { name: '常熟路', lines: [1, 7], minutes: 10 }, { name: '上海体育馆', lines: [1, 4], minutes: 15 }, { name: '龙华', lines: [11, 12], minutes: 15 }, { name: '漕河泾开发区', lines: [9], minutes: 20 }, { name: '七宝', lines: [9], minutes: 25 } ]; async function loadCookies() { try { const cookies = fs.readFileSync('./xiaozhu_cookies.json', 'utf8'); return JSON.parse(cookies); } catch (err) { console.log('❌ Cookies not found. Please login first and save cookies.'); console.log('Instructions:'); console.log('1. Run this script with LOGIN_MODE=true'); console.log('2. Login manually when browser opens'); console.log('3. Press Enter when done to save cookies'); return null; } } async function saveCookies(page) { const cookies = await page.cookies(); fs.writeFileSync('./xiaozhu_cookies.json', JSON.stringify(cookies, null, 2)); console.log('✅ Cookies saved to xiaozhu_cookies.json'); } async function loginMode() { console.log('🔐 LOGIN MODE - Manual login required'); const browser = await puppeteer.launch({ headless: false, defaultViewport: null }); const page = await browser.newPage(); await page.goto('https://www.xiaozhu.com'); console.log('📝 Please login manually in the browser...'); console.log('⏸️ Press Enter when you are logged in'); // Wait for user input await new Promise(resolve => { process.stdin.once('data', resolve); }); await saveCookies(page); await browser.close(); console.log('✅ Login complete! Run the script again without LOGIN_MODE'); } async function scrapXiaozhu() { const cookies = await loadCookies(); if (!cookies) { console.log('Run: LOGIN_MODE=true node xiaozhu_scraper.js'); return; } const browser = await puppeteer.launch({ headless: false, // Set to true for production defaultViewport: null }); const page = await browser.newPage(); // Set cookies await page.setCookie(...cookies); // Navigate to search page // Note: URL structure needs to be determined based on actual Xiaozhu website // This is a placeholder - we'll need to inspect the actual site const searchUrl = `https://www.xiaozhu.com/search-shanghai-${CONFIG.targetDistrict}/`; console.log(`🔍 Searching: ${searchUrl}`); await page.goto(searchUrl, { waitUntil: 'networkidle2' }); // Wait for listings to load // Selector needs to be determined by inspecting the actual page await page.waitForSelector('.result_list', { timeout: 10000 }).catch(() => { console.log('⚠️ Timeout waiting for listings. Page structure might have changed.'); }); // Extract listings const listings = await page.evaluate((config) => { const results = []; // This selector needs to be updated based on actual Xiaozhu HTML structure const cards = document.querySelectorAll('.result_list .result_item'); cards.forEach(card => { try { const listing = { title: card.querySelector('.result_title')?.textContent?.trim() || '', price: card.querySelector('.result_price')?.textContent?.trim() || '', priceNum: 0, // Will parse from price string location: card.querySelector('.result_address')?.textContent?.trim() || '', url: card.querySelector('a')?.href || '', image: card.querySelector('img')?.src || '', // Equipment flags (need to inspect actual HTML) hasKitchen: false, hasFridge: false, hasWashingMachine: false, hasMetro: false, // Metro info nearestStation: '', metroLines: [], estimatedMetroTime: 999 }; // Parse price (format: "3500元/月" or similar) const priceMatch = listing.price.match(/(\d+)/); if (priceMatch) { listing.priceNum = parseInt(priceMatch[1]); } // Check for equipment keywords in description const fullText = card.textContent.toLowerCase(); listing.hasKitchen = fullText.includes('厨房') || fullText.includes('kitchen'); listing.hasFridge = fullText.includes('冰箱') || fullText.includes('fridge'); listing.hasWashingMachine = fullText.includes('洗衣机') || fullText.includes('washing'); listing.hasMetro = fullText.includes('地铁') || fullText.includes('metro'); results.push(listing); } catch (err) { console.error('Error parsing listing:', err); } }); return results; }, CONFIG); console.log(`📊 Found ${listings.length} listings`); // Filter and score const filtered = listings .filter(l => l.priceNum > 0 && l.priceNum <= CONFIG.budgetMax) .filter(l => l.hasKitchen && l.hasFridge) // Must-have .map(l => { // Calculate score (lower is better for price, but higher for amenities) let score = 0; // Price score (ideal range gets bonus) if (l.priceNum <= CONFIG.budgetIdeal) { score += (CONFIG.budgetIdeal - l.priceNum) / 100; // Cheaper = better } else { score -= (l.priceNum - CONFIG.budgetIdeal) / 50; // Over ideal = penalty } // Amenity bonuses if (l.hasWashingMachine) score += 10; if (l.hasMetro) score += 15; // Metro time penalty (estimated) score -= l.estimatedMetroTime * 0.5; l.score = Math.round(score * 10) / 10; return l; }) .sort((a, b) => b.score - a.score) // Higher score = better .slice(0, CONFIG.topN); console.log(`✅ Filtered to ${filtered.length} suitable options`); // Save results fs.writeFileSync(CONFIG.outputFile, JSON.stringify(filtered, null, 2)); // Generate markdown table const markdown = generateMarkdown(filtered); fs.writeFileSync(CONFIG.outputMarkdown, markdown); console.log(`💾 Results saved to:`); console.log(` - ${CONFIG.outputFile}`); console.log(` - ${CONFIG.outputMarkdown}`); // Print top 5 console.log('\n🏆 TOP 5 OPTIONS:'); filtered.slice(0, 5).forEach((l, i) => { console.log(`\n${i + 1}. ${l.title}`); console.log(` 💰 ${l.price} (${l.priceNum} RMB)`); console.log(` 📍 ${l.location}`); console.log(` ⭐ Score: ${l.score}`); console.log(` 🔗 ${l.url}`); }); await browser.close(); } function generateMarkdown(listings) { let md = '# Xiaozhu Search Results - Xujiahui Campus Area\n\n'; md += `**Search Date:** ${new Date().toLocaleDateString()}\n`; md += `**Check-in:** ${CONFIG.checkIn}\n`; md += `**Check-out:** ${CONFIG.checkOut}\n`; md += `**Budget:** ${CONFIG.budgetMin}-${CONFIG.budgetMax} RMB/month (ideal: ${CONFIG.budgetIdeal})\n\n`; md += '| Rank | Price | Location | Kitchen | Fridge | Washer | Metro | Score | Link |\n'; md += '|------|-------|----------|---------|--------|--------|-------|-------|------|\n'; listings.forEach((l, i) => { md += `| ${i + 1} `; md += `| ${l.priceNum} RMB `; md += `| ${l.location.substring(0, 30)} `; md += `| ${l.hasKitchen ? '✓' : '✗'} `; md += `| ${l.hasFridge ? '✓' : '✗'} `; md += `| ${l.hasWashingMachine ? '✓' : '✗'} `; md += `| ${l.hasMetro ? '✓' : '✗'} `; md += `| ${l.score} `; md += `| [View](${l.url}) |\n`; }); md += '\n## Legend\n'; md += '- **Kitchen**: 厨房 required\n'; md += '- **Fridge**: 冰箱 required\n'; md += '- **Washer**: 洗衣机 bonus\n'; md += '- **Metro**: Near metro station bonus\n'; md += '- **Score**: Higher = better (price + amenities + location)\n'; return md; } // Main execution (async () => { if (process.env.LOGIN_MODE === 'true') { await loginMode(); } else { await scrapXiaozhu(); } })();