const puppeteer = require('puppeteer'); const fs = require('fs'); /** * Xiaozhu Minsu Scraper - Client interface scraper * URL: https://minsu.xiaozhu.com/ * * Critères: * - Xujiahui District (徐汇区) - près de Jiaotong University * - 24 déc 2025 → 22 jan 2026 (29 jours) * - Budget: 3000-5000 RMB/mois (idéal 3000-4000) * - Must-have: Cuisine + frigo * - Nice: Machine à laver, proche métro */ const CONFIG = { // Dates checkIn: '2025-12-24', checkOut: '2026-01-22', // Location city: '上海', district: '徐汇区', keyword: '交通大学', // Near Jiaotong University // Budget (RMB/month) budgetMin: 0, budgetMax: 5000, budgetIdeal: 4000, // Calculate daily budget (29 days) days: 29, get dailyBudgetMax() { return Math.ceil(this.budgetMax / 30 * this.days); }, get dailyBudgetIdeal() { return Math.ceil(this.budgetIdeal / 30 * this.days); }, // Equipment required: ['厨房', '冰箱'], // Kitchen, fridge bonus: ['洗衣机', '地铁'], // Washing machine, metro // Output outputFile: './xiaozhu_minsu_results.json', outputMarkdown: './xiaozhu_minsu_results.md', topN: 20 }; console.log('💰 Budget calculation:'); console.log(` Monthly budget: ${CONFIG.budgetIdeal}-${CONFIG.budgetMax} RMB`); console.log(` Stay duration: ${CONFIG.days} days`); console.log(` Daily budget: ${CONFIG.dailyBudgetIdeal}-${CONFIG.dailyBudgetMax} RMB total`); async function loadCookies() { try { const cookies = fs.readFileSync('./xiaozhu_cookies.json', 'utf8'); return JSON.parse(cookies); } catch (err) { console.log('⚠️ No cookies found!'); console.log('\n📋 SETUP REQUIRED:'); console.log('1. Go to https://minsu.xiaozhu.com/ in Firefox'); console.log('2. Login if needed'); console.log('3. Extract cookies using firefox_cookie_converter.js'); console.log('4. Run this script again\n'); return null; } } async function scrapMinsuXiaozhu() { const cookies = await loadCookies(); const browser = await puppeteer.launch({ headless: "new", defaultViewport: { width: 1920, height: 1080 }, args: ['--no-sandbox', '--disable-setuid-sandbox'] }); const page = await browser.newPage(); // Set user agent await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'); // Load cookies if available if (cookies && cookies.length > 0) { await page.setCookie(...cookies); console.log(`🍪 Loaded ${cookies.length} cookies`); } try { // Try different search URL patterns const searchUrls = [ `https://minsu.xiaozhu.com/search-shanghai-${CONFIG.district}/`, `https://minsu.xiaozhu.com/shanghai/${CONFIG.district}/`, `https://minsu.xiaozhu.com/search?city=shanghai&district=${CONFIG.district}`, 'https://minsu.xiaozhu.com/shanghai/', 'https://minsu.xiaozhu.com/' ]; let pageLoaded = false; let currentUrl = ''; for (const url of searchUrls) { console.log(`\n🔍 Trying: ${url}`); try { await page.goto(url, { waitUntil: 'networkidle2', timeout: 20000 }); currentUrl = page.url(); console.log(`✅ Loaded: ${currentUrl}`); // Wait for content await new Promise(resolve => setTimeout(resolve, 3000)); // Check if we found listings const hasListings = await page.evaluate(() => { const possibleSelectors = [ '.room-list', '.house-list', '.list-item', '[class*="room"]', '[class*="house"]', '[class*="result"]' ]; for (const sel of possibleSelectors) { const elements = document.querySelectorAll(sel); if (elements.length > 2) return true; } return false; }); if (hasListings || currentUrl.includes('search') || currentUrl.includes('shanghai')) { pageLoaded = true; break; } } catch (err) { console.log(` ❌ Failed: ${err.message}`); } } if (!pageLoaded) { console.log('\n❌ Could not load search page. Trying homepage navigation...'); await page.goto('https://minsu.xiaozhu.com/', { waitUntil: 'networkidle2' }); } // Take screenshot await page.screenshot({ path: './xiaozhu_minsu_page.png', fullPage: true }); console.log('📸 Screenshot saved: xiaozhu_minsu_page.png'); // Try to find and use search functionality console.log('\n🔍 Looking for search input...'); const searchInputSelectors = [ 'input[placeholder*="目的地"]', 'input[placeholder*="搜索"]', 'input[placeholder*="城市"]', 'input.search-input', '#search-input', 'input[type="text"]' ]; let searchFound = false; for (const selector of searchInputSelectors) { try { const input = await page.$(selector); if (input) { console.log(`✅ Found search input: ${selector}`); // Type search query await input.click(); await page.keyboard.type(`${CONFIG.city} ${CONFIG.district}`, { delay: 100 }); await new Promise(resolve => setTimeout(resolve, 1500)); // Try to submit await page.keyboard.press('Enter'); await new Promise(resolve => setTimeout(resolve, 3000)); searchFound = true; break; } } catch (e) {} } if (!searchFound) { console.log('⚠️ No search input found, will extract from current page'); } // Extract listings console.log('\n📊 Extracting listings...'); const listings = await page.evaluate((config) => { const results = []; // Possible selectors for listing items const containerSelectors = [ '.pho_item', '.room_box', '.house-item', '.result-item', '[class*="room-item"]', '[class*="house-item"]', '[class*="card"]' ]; let listingElements = []; for (const selector of containerSelectors) { const elements = document.querySelectorAll(selector); if (elements.length > 2 && elements.length < 200) { listingElements = Array.from(elements); console.log(`Found ${elements.length} items with selector: ${selector}`); break; } } listingElements.forEach((item, index) => { if (index >= 50) return; // Limit to first 50 const listing = { index: index + 1, raw: item.textContent.substring(0, 300) }; // Extract title const titleEl = item.querySelector('h2, h3, h4, [class*="title"], [class*="name"]'); if (titleEl) { listing.title = titleEl.textContent.trim(); } // Extract price const priceSelectors = [ '[class*="price"]', '[class*="money"]', 'span:contains("¥")', 'span:contains("元")' ]; for (const priceSel of priceSelectors) { try { const priceEl = item.querySelector(priceSel); if (priceEl) { const priceText = priceEl.textContent; const priceMatch = priceText.match(/(\d+)/); if (priceMatch) { listing.priceDaily = parseInt(priceMatch[1]); listing.priceText = priceText; break; } } } catch (e) {} } // Extract location const locationEl = item.querySelector('[class*="location"], [class*="address"], [class*="area"]'); if (locationEl) { listing.location = locationEl.textContent.trim(); } // Extract URL const linkEl = item.querySelector('a'); if (linkEl) { listing.url = linkEl.href; } // Extract image const imgEl = item.querySelector('img'); if (imgEl) { listing.image = imgEl.src; } // Check for equipment keywords const fullText = item.textContent; listing.hasKitchen = fullText.includes('厨房') || fullText.includes('kitchen'); listing.hasFridge = fullText.includes('冰箱') || fullText.includes('fridge'); listing.hasWashingMachine = fullText.includes('洗衣机') || fullText.includes('washing'); listing.hasMetro = fullText.includes('地铁') || fullText.includes('metro'); if (listing.title || listing.priceDaily) { results.push(listing); } }); return results; }, CONFIG); console.log(`✅ Extracted ${listings.length} listings`); if (listings.length === 0) { console.log('\n❌ No listings found. Saving page HTML for manual inspection...'); const html = await page.content(); fs.writeFileSync('./xiaozhu_minsu_page.html', html); console.log('💾 HTML saved to: xiaozhu_minsu_page.html'); } else { // Calculate total price for stay duration const filtered = listings .filter(l => l.priceDaily > 0) .map(l => { l.priceTotal = l.priceDaily * CONFIG.days; l.priceMonthly = Math.ceil(l.priceDaily * 30); // Score calculation let score = 0; // Price score if (l.priceTotal <= CONFIG.dailyBudgetIdeal) { score += (CONFIG.dailyBudgetIdeal - l.priceTotal) / 100; } else if (l.priceTotal <= CONFIG.dailyBudgetMax) { score -= (l.priceTotal - CONFIG.dailyBudgetIdeal) / 50; } else { score -= 100; // Over budget penalty } // Equipment bonuses if (l.hasKitchen) score += 20; if (l.hasFridge) score += 15; if (l.hasWashingMachine) score += 10; if (l.hasMetro) score += 15; // Location bonus (if contains keyword) if (l.location && l.location.includes(CONFIG.district)) score += 20; if (l.title && l.title.includes(CONFIG.keyword)) score += 10; l.score = Math.round(score * 10) / 10; return l; }) .filter(l => l.hasKitchen && l.hasFridge) // Must-have requirements .filter(l => l.priceTotal <= CONFIG.dailyBudgetMax) // Budget filter .sort((a, b) => b.score - a.score) .slice(0, CONFIG.topN); console.log(`\n✅ Filtered to ${filtered.length} suitable options`); // Save results fs.writeFileSync(CONFIG.outputFile, JSON.stringify(filtered, null, 2)); console.log(`💾 Results saved to: ${CONFIG.outputFile}`); // Generate markdown const markdown = generateMarkdown(filtered); fs.writeFileSync(CONFIG.outputMarkdown, markdown); console.log(`📝 Markdown saved to: ${CONFIG.outputMarkdown}`); // Print top 5 console.log('\n🏆 TOP 5 OPTIONS:\n'); filtered.slice(0, 5).forEach((l, i) => { console.log(`${i + 1}. ${l.title || 'No title'}`); console.log(` 💰 ${l.priceDaily} RMB/day × ${CONFIG.days} days = ${l.priceTotal} RMB total (~${l.priceMonthly} RMB/month)`); if (l.location) console.log(` 📍 ${l.location}`); console.log(` ✓ Kitchen: ${l.hasKitchen ? '✓' : '✗'} | Fridge: ${l.hasFridge ? '✓' : '✗'} | Washer: ${l.hasWashingMachine ? '✓' : '✗'} | Metro: ${l.hasMetro ? '✓' : '✗'}`); console.log(` ⭐ Score: ${l.score}`); if (l.url) console.log(` 🔗 ${l.url}`); console.log(''); }); } // Save final screenshot await page.screenshot({ path: './xiaozhu_minsu_final.png', fullPage: true }); console.log('📸 Final screenshot: xiaozhu_minsu_final.png'); } catch (err) { console.error('❌ Error:', err.message); console.error(err.stack); } finally { await browser.close(); } } function generateMarkdown(listings) { let md = '# Xiaozhu Minsu Search Results - Xujiahui District\n\n'; md += `**Search Date:** ${new Date().toLocaleDateString()}\n`; md += `**Check-in:** ${CONFIG.checkIn}\n`; md += `**Check-out:** ${CONFIG.checkOut}\n`; md += `**Duration:** ${CONFIG.days} days\n`; md += `**Daily Budget:** ${CONFIG.dailyBudgetIdeal}-${CONFIG.dailyBudgetMax} RMB total\n`; md += `**Monthly Equivalent:** ${CONFIG.budgetIdeal}-${CONFIG.budgetMax} RMB/month\n\n`; md += '| # | Title | Daily | Total | Kitchen | Fridge | Washer | Metro | Score | Link |\n'; md += '|---|-------|-------|-------|---------|--------|--------|-------|-------|------|\n'; listings.forEach((l, i) => { md += `| ${i + 1} `; md += `| ${(l.title || 'No title').substring(0, 40)} `; md += `| ¥${l.priceDaily} `; md += `| ¥${l.priceTotal} `; md += `| ${l.hasKitchen ? '✓' : '✗'} `; md += `| ${l.hasFridge ? '✓' : '✗'} `; md += `| ${l.hasWashingMachine ? '✓' : '✗'} `; md += `| ${l.hasMetro ? '✓' : '✗'} `; md += `| ${l.score} `; md += `| ${l.url ? `[View](${l.url})` : '-'} |\n`; }); md += '\n## Legend\n\n'; md += '- **Daily**: Price per day (RMB)\n'; md += `- **Total**: Total price for ${CONFIG.days} days stay\n`; md += '- **Kitchen**: 厨房 (required)\n'; md += '- **Fridge**: 冰箱 (required)\n'; md += '- **Washer**: 洗衣机 (bonus)\n'; md += '- **Metro**: Near metro station (bonus)\n'; md += '- **Score**: Higher = better (price + amenities + location)\n'; return md; } // Run scrapMinsuXiaozhu().catch(console.error);