Major updates: - December 2025 crisis documentation and separation agreement - Daily check system v2 with multiple card categories - Xiaozhu rental search tools and results - Exit plan documentation - Message drafts for family communication - Confluent moved to CONSTANT - Updated profiles and promises 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
400 lines
13 KiB
JavaScript
400 lines
13 KiB
JavaScript
const puppeteer = require('puppeteer');
|
||
const fs = require('fs');
|
||
|
||
/**
|
||
* Xiaozhu Minsu Scraper - Client interface scraper
|
||
* URL: https://minsu.xiaozhu.com/
|
||
*
|
||
* Critères:
|
||
* - Xujiahui District (徐汇区) - près de Jiaotong University
|
||
* - 24 déc 2025 → 22 jan 2026 (29 jours)
|
||
* - Budget: 3000-5000 RMB/mois (idéal 3000-4000)
|
||
* - Must-have: Cuisine + frigo
|
||
* - Nice: Machine à laver, proche métro
|
||
*/
|
||
|
||
const CONFIG = {
|
||
// Dates
|
||
checkIn: '2025-12-24',
|
||
checkOut: '2026-01-22',
|
||
|
||
// Location
|
||
city: '上海',
|
||
district: '徐汇区',
|
||
keyword: '交通大学', // Near Jiaotong University
|
||
|
||
// Budget (RMB/month)
|
||
budgetMin: 0,
|
||
budgetMax: 5000,
|
||
budgetIdeal: 4000,
|
||
|
||
// Calculate daily budget (29 days)
|
||
days: 29,
|
||
get dailyBudgetMax() {
|
||
return Math.ceil(this.budgetMax / 30 * this.days);
|
||
},
|
||
get dailyBudgetIdeal() {
|
||
return Math.ceil(this.budgetIdeal / 30 * this.days);
|
||
},
|
||
|
||
// Equipment
|
||
required: ['厨房', '冰箱'], // Kitchen, fridge
|
||
bonus: ['洗衣机', '地铁'], // Washing machine, metro
|
||
|
||
// Output
|
||
outputFile: './xiaozhu_minsu_results.json',
|
||
outputMarkdown: './xiaozhu_minsu_results.md',
|
||
topN: 20
|
||
};
|
||
|
||
console.log('💰 Budget calculation:');
|
||
console.log(` Monthly budget: ${CONFIG.budgetIdeal}-${CONFIG.budgetMax} RMB`);
|
||
console.log(` Stay duration: ${CONFIG.days} days`);
|
||
console.log(` Daily budget: ${CONFIG.dailyBudgetIdeal}-${CONFIG.dailyBudgetMax} RMB total`);
|
||
|
||
async function loadCookies() {
|
||
try {
|
||
const cookies = fs.readFileSync('./xiaozhu_cookies.json', 'utf8');
|
||
return JSON.parse(cookies);
|
||
} catch (err) {
|
||
console.log('⚠️ No cookies found!');
|
||
console.log('\n📋 SETUP REQUIRED:');
|
||
console.log('1. Go to https://minsu.xiaozhu.com/ in Firefox');
|
||
console.log('2. Login if needed');
|
||
console.log('3. Extract cookies using firefox_cookie_converter.js');
|
||
console.log('4. Run this script again\n');
|
||
return null;
|
||
}
|
||
}
|
||
|
||
async function scrapMinsuXiaozhu() {
|
||
const cookies = await loadCookies();
|
||
|
||
const browser = await puppeteer.launch({
|
||
headless: "new",
|
||
defaultViewport: { width: 1920, height: 1080 },
|
||
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
||
});
|
||
|
||
const page = await browser.newPage();
|
||
|
||
// Set user agent
|
||
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
|
||
|
||
// Load cookies if available
|
||
if (cookies && cookies.length > 0) {
|
||
await page.setCookie(...cookies);
|
||
console.log(`🍪 Loaded ${cookies.length} cookies`);
|
||
}
|
||
|
||
try {
|
||
// Try different search URL patterns
|
||
const searchUrls = [
|
||
`https://minsu.xiaozhu.com/search-shanghai-${CONFIG.district}/`,
|
||
`https://minsu.xiaozhu.com/shanghai/${CONFIG.district}/`,
|
||
`https://minsu.xiaozhu.com/search?city=shanghai&district=${CONFIG.district}`,
|
||
'https://minsu.xiaozhu.com/shanghai/',
|
||
'https://minsu.xiaozhu.com/'
|
||
];
|
||
|
||
let pageLoaded = false;
|
||
let currentUrl = '';
|
||
|
||
for (const url of searchUrls) {
|
||
console.log(`\n🔍 Trying: ${url}`);
|
||
|
||
try {
|
||
await page.goto(url, {
|
||
waitUntil: 'networkidle2',
|
||
timeout: 20000
|
||
});
|
||
|
||
currentUrl = page.url();
|
||
console.log(`✅ Loaded: ${currentUrl}`);
|
||
|
||
// Wait for content
|
||
await new Promise(resolve => setTimeout(resolve, 3000));
|
||
|
||
// Check if we found listings
|
||
const hasListings = await page.evaluate(() => {
|
||
const possibleSelectors = [
|
||
'.room-list', '.house-list', '.list-item',
|
||
'[class*="room"]', '[class*="house"]', '[class*="result"]'
|
||
];
|
||
|
||
for (const sel of possibleSelectors) {
|
||
const elements = document.querySelectorAll(sel);
|
||
if (elements.length > 2) return true;
|
||
}
|
||
return false;
|
||
});
|
||
|
||
if (hasListings || currentUrl.includes('search') || currentUrl.includes('shanghai')) {
|
||
pageLoaded = true;
|
||
break;
|
||
}
|
||
|
||
} catch (err) {
|
||
console.log(` ❌ Failed: ${err.message}`);
|
||
}
|
||
}
|
||
|
||
if (!pageLoaded) {
|
||
console.log('\n❌ Could not load search page. Trying homepage navigation...');
|
||
await page.goto('https://minsu.xiaozhu.com/', { waitUntil: 'networkidle2' });
|
||
}
|
||
|
||
// Take screenshot
|
||
await page.screenshot({ path: './xiaozhu_minsu_page.png', fullPage: true });
|
||
console.log('📸 Screenshot saved: xiaozhu_minsu_page.png');
|
||
|
||
// Try to find and use search functionality
|
||
console.log('\n🔍 Looking for search input...');
|
||
|
||
const searchInputSelectors = [
|
||
'input[placeholder*="目的地"]',
|
||
'input[placeholder*="搜索"]',
|
||
'input[placeholder*="城市"]',
|
||
'input.search-input',
|
||
'#search-input',
|
||
'input[type="text"]'
|
||
];
|
||
|
||
let searchFound = false;
|
||
for (const selector of searchInputSelectors) {
|
||
try {
|
||
const input = await page.$(selector);
|
||
if (input) {
|
||
console.log(`✅ Found search input: ${selector}`);
|
||
|
||
// Type search query
|
||
await input.click();
|
||
await page.keyboard.type(`${CONFIG.city} ${CONFIG.district}`, { delay: 100 });
|
||
await new Promise(resolve => setTimeout(resolve, 1500));
|
||
|
||
// Try to submit
|
||
await page.keyboard.press('Enter');
|
||
await new Promise(resolve => setTimeout(resolve, 3000));
|
||
|
||
searchFound = true;
|
||
break;
|
||
}
|
||
} catch (e) {}
|
||
}
|
||
|
||
if (!searchFound) {
|
||
console.log('⚠️ No search input found, will extract from current page');
|
||
}
|
||
|
||
// Extract listings
|
||
console.log('\n📊 Extracting listings...');
|
||
|
||
const listings = await page.evaluate((config) => {
|
||
const results = [];
|
||
|
||
// Possible selectors for listing items
|
||
const containerSelectors = [
|
||
'.pho_item', '.room_box', '.house-item', '.result-item',
|
||
'[class*="room-item"]', '[class*="house-item"]',
|
||
'[class*="card"]'
|
||
];
|
||
|
||
let listingElements = [];
|
||
for (const selector of containerSelectors) {
|
||
const elements = document.querySelectorAll(selector);
|
||
if (elements.length > 2 && elements.length < 200) {
|
||
listingElements = Array.from(elements);
|
||
console.log(`Found ${elements.length} items with selector: ${selector}`);
|
||
break;
|
||
}
|
||
}
|
||
|
||
listingElements.forEach((item, index) => {
|
||
if (index >= 50) return; // Limit to first 50
|
||
|
||
const listing = {
|
||
index: index + 1,
|
||
raw: item.textContent.substring(0, 300)
|
||
};
|
||
|
||
// Extract title
|
||
const titleEl = item.querySelector('h2, h3, h4, [class*="title"], [class*="name"]');
|
||
if (titleEl) {
|
||
listing.title = titleEl.textContent.trim();
|
||
}
|
||
|
||
// Extract price
|
||
const priceSelectors = [
|
||
'[class*="price"]', '[class*="money"]',
|
||
'span:contains("¥")', 'span:contains("元")'
|
||
];
|
||
|
||
for (const priceSel of priceSelectors) {
|
||
try {
|
||
const priceEl = item.querySelector(priceSel);
|
||
if (priceEl) {
|
||
const priceText = priceEl.textContent;
|
||
const priceMatch = priceText.match(/(\d+)/);
|
||
if (priceMatch) {
|
||
listing.priceDaily = parseInt(priceMatch[1]);
|
||
listing.priceText = priceText;
|
||
break;
|
||
}
|
||
}
|
||
} catch (e) {}
|
||
}
|
||
|
||
// Extract location
|
||
const locationEl = item.querySelector('[class*="location"], [class*="address"], [class*="area"]');
|
||
if (locationEl) {
|
||
listing.location = locationEl.textContent.trim();
|
||
}
|
||
|
||
// Extract URL
|
||
const linkEl = item.querySelector('a');
|
||
if (linkEl) {
|
||
listing.url = linkEl.href;
|
||
}
|
||
|
||
// Extract image
|
||
const imgEl = item.querySelector('img');
|
||
if (imgEl) {
|
||
listing.image = imgEl.src;
|
||
}
|
||
|
||
// Check for equipment keywords
|
||
const fullText = item.textContent;
|
||
listing.hasKitchen = fullText.includes('厨房') || fullText.includes('kitchen');
|
||
listing.hasFridge = fullText.includes('冰箱') || fullText.includes('fridge');
|
||
listing.hasWashingMachine = fullText.includes('洗衣机') || fullText.includes('washing');
|
||
listing.hasMetro = fullText.includes('地铁') || fullText.includes('metro');
|
||
|
||
if (listing.title || listing.priceDaily) {
|
||
results.push(listing);
|
||
}
|
||
});
|
||
|
||
return results;
|
||
}, CONFIG);
|
||
|
||
console.log(`✅ Extracted ${listings.length} listings`);
|
||
|
||
if (listings.length === 0) {
|
||
console.log('\n❌ No listings found. Saving page HTML for manual inspection...');
|
||
const html = await page.content();
|
||
fs.writeFileSync('./xiaozhu_minsu_page.html', html);
|
||
console.log('💾 HTML saved to: xiaozhu_minsu_page.html');
|
||
} else {
|
||
// Calculate total price for stay duration
|
||
const filtered = listings
|
||
.filter(l => l.priceDaily > 0)
|
||
.map(l => {
|
||
l.priceTotal = l.priceDaily * CONFIG.days;
|
||
l.priceMonthly = Math.ceil(l.priceDaily * 30);
|
||
|
||
// Score calculation
|
||
let score = 0;
|
||
|
||
// Price score
|
||
if (l.priceTotal <= CONFIG.dailyBudgetIdeal) {
|
||
score += (CONFIG.dailyBudgetIdeal - l.priceTotal) / 100;
|
||
} else if (l.priceTotal <= CONFIG.dailyBudgetMax) {
|
||
score -= (l.priceTotal - CONFIG.dailyBudgetIdeal) / 50;
|
||
} else {
|
||
score -= 100; // Over budget penalty
|
||
}
|
||
|
||
// Equipment bonuses
|
||
if (l.hasKitchen) score += 20;
|
||
if (l.hasFridge) score += 15;
|
||
if (l.hasWashingMachine) score += 10;
|
||
if (l.hasMetro) score += 15;
|
||
|
||
// Location bonus (if contains keyword)
|
||
if (l.location && l.location.includes(CONFIG.district)) score += 20;
|
||
if (l.title && l.title.includes(CONFIG.keyword)) score += 10;
|
||
|
||
l.score = Math.round(score * 10) / 10;
|
||
return l;
|
||
})
|
||
.filter(l => l.hasKitchen && l.hasFridge) // Must-have requirements
|
||
.filter(l => l.priceTotal <= CONFIG.dailyBudgetMax) // Budget filter
|
||
.sort((a, b) => b.score - a.score)
|
||
.slice(0, CONFIG.topN);
|
||
|
||
console.log(`\n✅ Filtered to ${filtered.length} suitable options`);
|
||
|
||
// Save results
|
||
fs.writeFileSync(CONFIG.outputFile, JSON.stringify(filtered, null, 2));
|
||
console.log(`💾 Results saved to: ${CONFIG.outputFile}`);
|
||
|
||
// Generate markdown
|
||
const markdown = generateMarkdown(filtered);
|
||
fs.writeFileSync(CONFIG.outputMarkdown, markdown);
|
||
console.log(`📝 Markdown saved to: ${CONFIG.outputMarkdown}`);
|
||
|
||
// Print top 5
|
||
console.log('\n🏆 TOP 5 OPTIONS:\n');
|
||
filtered.slice(0, 5).forEach((l, i) => {
|
||
console.log(`${i + 1}. ${l.title || 'No title'}`);
|
||
console.log(` 💰 ${l.priceDaily} RMB/day × ${CONFIG.days} days = ${l.priceTotal} RMB total (~${l.priceMonthly} RMB/month)`);
|
||
if (l.location) console.log(` 📍 ${l.location}`);
|
||
console.log(` ✓ Kitchen: ${l.hasKitchen ? '✓' : '✗'} | Fridge: ${l.hasFridge ? '✓' : '✗'} | Washer: ${l.hasWashingMachine ? '✓' : '✗'} | Metro: ${l.hasMetro ? '✓' : '✗'}`);
|
||
console.log(` ⭐ Score: ${l.score}`);
|
||
if (l.url) console.log(` 🔗 ${l.url}`);
|
||
console.log('');
|
||
});
|
||
}
|
||
|
||
// Save final screenshot
|
||
await page.screenshot({ path: './xiaozhu_minsu_final.png', fullPage: true });
|
||
console.log('📸 Final screenshot: xiaozhu_minsu_final.png');
|
||
|
||
} catch (err) {
|
||
console.error('❌ Error:', err.message);
|
||
console.error(err.stack);
|
||
} finally {
|
||
await browser.close();
|
||
}
|
||
}
|
||
|
||
function generateMarkdown(listings) {
|
||
let md = '# Xiaozhu Minsu Search Results - Xujiahui District\n\n';
|
||
md += `**Search Date:** ${new Date().toLocaleDateString()}\n`;
|
||
md += `**Check-in:** ${CONFIG.checkIn}\n`;
|
||
md += `**Check-out:** ${CONFIG.checkOut}\n`;
|
||
md += `**Duration:** ${CONFIG.days} days\n`;
|
||
md += `**Daily Budget:** ${CONFIG.dailyBudgetIdeal}-${CONFIG.dailyBudgetMax} RMB total\n`;
|
||
md += `**Monthly Equivalent:** ${CONFIG.budgetIdeal}-${CONFIG.budgetMax} RMB/month\n\n`;
|
||
|
||
md += '| # | Title | Daily | Total | Kitchen | Fridge | Washer | Metro | Score | Link |\n';
|
||
md += '|---|-------|-------|-------|---------|--------|--------|-------|-------|------|\n';
|
||
|
||
listings.forEach((l, i) => {
|
||
md += `| ${i + 1} `;
|
||
md += `| ${(l.title || 'No title').substring(0, 40)} `;
|
||
md += `| ¥${l.priceDaily} `;
|
||
md += `| ¥${l.priceTotal} `;
|
||
md += `| ${l.hasKitchen ? '✓' : '✗'} `;
|
||
md += `| ${l.hasFridge ? '✓' : '✗'} `;
|
||
md += `| ${l.hasWashingMachine ? '✓' : '✗'} `;
|
||
md += `| ${l.hasMetro ? '✓' : '✗'} `;
|
||
md += `| ${l.score} `;
|
||
md += `| ${l.url ? `[View](${l.url})` : '-'} |\n`;
|
||
});
|
||
|
||
md += '\n## Legend\n\n';
|
||
md += '- **Daily**: Price per day (RMB)\n';
|
||
md += `- **Total**: Total price for ${CONFIG.days} days stay\n`;
|
||
md += '- **Kitchen**: 厨房 (required)\n';
|
||
md += '- **Fridge**: 冰箱 (required)\n';
|
||
md += '- **Washer**: 洗衣机 (bonus)\n';
|
||
md += '- **Metro**: Near metro station (bonus)\n';
|
||
md += '- **Score**: Higher = better (price + amenities + location)\n';
|
||
|
||
return md;
|
||
}
|
||
|
||
// Run
|
||
scrapMinsuXiaozhu().catch(console.error);
|