Major updates: - December 2025 crisis documentation and separation agreement - Daily check system v2 with multiple card categories - Xiaozhu rental search tools and results - Exit plan documentation - Message drafts for family communication - Confluent moved to CONSTANT - Updated profiles and promises 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
269 lines
8.6 KiB
JavaScript
269 lines
8.6 KiB
JavaScript
const puppeteer = require('puppeteer');
|
|
const fs = require('fs');
|
|
|
|
/**
|
|
* Xiaozhu Scraper - Location appart près de Jiaoda Xujiahui Campus
|
|
* Dates: 24 dec 2025 → 22 jan 2026
|
|
* Budget: 3000-5000 RMB/mois (idéal 3000-4000)
|
|
* Critères: Cuisine + frigo requis, machine à laver bonus
|
|
*/
|
|
|
|
const CONFIG = {
|
|
// Dates de location
|
|
checkIn: '2025-12-24',
|
|
checkOut: '2026-01-22',
|
|
|
|
// Budget (RMB/mois)
|
|
budgetMin: 0,
|
|
budgetMax: 5000,
|
|
budgetIdeal: 4000,
|
|
|
|
// Localisation (Xujiahui campus)
|
|
targetDistrict: '徐汇区', // Xuhui District
|
|
targetMetroLines: [1, 7, 9, 10, 11], // Lignes accessibles
|
|
maxMetroTime: 25, // minutes max
|
|
|
|
// Équipements requis
|
|
required: ['kitchen', 'fridge'],
|
|
bonus: ['washingMachine', 'metro'],
|
|
|
|
// Output
|
|
outputFile: './xiaozhu_results.json',
|
|
outputMarkdown: './xiaozhu_results.md',
|
|
topN: 20
|
|
};
|
|
|
|
// Stations de métro proches du campus (Xujiahui)
|
|
const PREFERRED_STATIONS = [
|
|
{ name: '交通大学', lines: [10, 11], minutes: 0 },
|
|
{ name: '徐家汇', lines: [1, 9, 11], minutes: 5 },
|
|
{ name: '衡山路', lines: [1], minutes: 10 },
|
|
{ name: '常熟路', lines: [1, 7], minutes: 10 },
|
|
{ name: '上海体育馆', lines: [1, 4], minutes: 15 },
|
|
{ name: '龙华', lines: [11, 12], minutes: 15 },
|
|
{ name: '漕河泾开发区', lines: [9], minutes: 20 },
|
|
{ name: '七宝', lines: [9], minutes: 25 }
|
|
];
|
|
|
|
async function loadCookies() {
|
|
try {
|
|
const cookies = fs.readFileSync('./xiaozhu_cookies.json', 'utf8');
|
|
return JSON.parse(cookies);
|
|
} catch (err) {
|
|
console.log('❌ Cookies not found. Please login first and save cookies.');
|
|
console.log('Instructions:');
|
|
console.log('1. Run this script with LOGIN_MODE=true');
|
|
console.log('2. Login manually when browser opens');
|
|
console.log('3. Press Enter when done to save cookies');
|
|
return null;
|
|
}
|
|
}
|
|
|
|
async function saveCookies(page) {
|
|
const cookies = await page.cookies();
|
|
fs.writeFileSync('./xiaozhu_cookies.json', JSON.stringify(cookies, null, 2));
|
|
console.log('✅ Cookies saved to xiaozhu_cookies.json');
|
|
}
|
|
|
|
async function loginMode() {
|
|
console.log('🔐 LOGIN MODE - Manual login required');
|
|
const browser = await puppeteer.launch({
|
|
headless: false,
|
|
defaultViewport: null
|
|
});
|
|
|
|
const page = await browser.newPage();
|
|
await page.goto('https://www.xiaozhu.com');
|
|
|
|
console.log('📝 Please login manually in the browser...');
|
|
console.log('⏸️ Press Enter when you are logged in');
|
|
|
|
// Wait for user input
|
|
await new Promise(resolve => {
|
|
process.stdin.once('data', resolve);
|
|
});
|
|
|
|
await saveCookies(page);
|
|
await browser.close();
|
|
console.log('✅ Login complete! Run the script again without LOGIN_MODE');
|
|
}
|
|
|
|
async function scrapXiaozhu() {
|
|
const cookies = await loadCookies();
|
|
if (!cookies) {
|
|
console.log('Run: LOGIN_MODE=true node xiaozhu_scraper.js');
|
|
return;
|
|
}
|
|
|
|
const browser = await puppeteer.launch({
|
|
headless: false, // Set to true for production
|
|
defaultViewport: null
|
|
});
|
|
|
|
const page = await browser.newPage();
|
|
|
|
// Set cookies
|
|
await page.setCookie(...cookies);
|
|
|
|
// Navigate to search page
|
|
// Note: URL structure needs to be determined based on actual Xiaozhu website
|
|
// This is a placeholder - we'll need to inspect the actual site
|
|
const searchUrl = `https://www.xiaozhu.com/search-shanghai-${CONFIG.targetDistrict}/`;
|
|
|
|
console.log(`🔍 Searching: ${searchUrl}`);
|
|
await page.goto(searchUrl, { waitUntil: 'networkidle2' });
|
|
|
|
// Wait for listings to load
|
|
// Selector needs to be determined by inspecting the actual page
|
|
await page.waitForSelector('.result_list', { timeout: 10000 }).catch(() => {
|
|
console.log('⚠️ Timeout waiting for listings. Page structure might have changed.');
|
|
});
|
|
|
|
// Extract listings
|
|
const listings = await page.evaluate((config) => {
|
|
const results = [];
|
|
|
|
// This selector needs to be updated based on actual Xiaozhu HTML structure
|
|
const cards = document.querySelectorAll('.result_list .result_item');
|
|
|
|
cards.forEach(card => {
|
|
try {
|
|
const listing = {
|
|
title: card.querySelector('.result_title')?.textContent?.trim() || '',
|
|
price: card.querySelector('.result_price')?.textContent?.trim() || '',
|
|
priceNum: 0, // Will parse from price string
|
|
location: card.querySelector('.result_address')?.textContent?.trim() || '',
|
|
url: card.querySelector('a')?.href || '',
|
|
image: card.querySelector('img')?.src || '',
|
|
|
|
// Equipment flags (need to inspect actual HTML)
|
|
hasKitchen: false,
|
|
hasFridge: false,
|
|
hasWashingMachine: false,
|
|
hasMetro: false,
|
|
|
|
// Metro info
|
|
nearestStation: '',
|
|
metroLines: [],
|
|
estimatedMetroTime: 999
|
|
};
|
|
|
|
// Parse price (format: "3500元/月" or similar)
|
|
const priceMatch = listing.price.match(/(\d+)/);
|
|
if (priceMatch) {
|
|
listing.priceNum = parseInt(priceMatch[1]);
|
|
}
|
|
|
|
// Check for equipment keywords in description
|
|
const fullText = card.textContent.toLowerCase();
|
|
listing.hasKitchen = fullText.includes('厨房') || fullText.includes('kitchen');
|
|
listing.hasFridge = fullText.includes('冰箱') || fullText.includes('fridge');
|
|
listing.hasWashingMachine = fullText.includes('洗衣机') || fullText.includes('washing');
|
|
listing.hasMetro = fullText.includes('地铁') || fullText.includes('metro');
|
|
|
|
results.push(listing);
|
|
} catch (err) {
|
|
console.error('Error parsing listing:', err);
|
|
}
|
|
});
|
|
|
|
return results;
|
|
}, CONFIG);
|
|
|
|
console.log(`📊 Found ${listings.length} listings`);
|
|
|
|
// Filter and score
|
|
const filtered = listings
|
|
.filter(l => l.priceNum > 0 && l.priceNum <= CONFIG.budgetMax)
|
|
.filter(l => l.hasKitchen && l.hasFridge) // Must-have
|
|
.map(l => {
|
|
// Calculate score (lower is better for price, but higher for amenities)
|
|
let score = 0;
|
|
|
|
// Price score (ideal range gets bonus)
|
|
if (l.priceNum <= CONFIG.budgetIdeal) {
|
|
score += (CONFIG.budgetIdeal - l.priceNum) / 100; // Cheaper = better
|
|
} else {
|
|
score -= (l.priceNum - CONFIG.budgetIdeal) / 50; // Over ideal = penalty
|
|
}
|
|
|
|
// Amenity bonuses
|
|
if (l.hasWashingMachine) score += 10;
|
|
if (l.hasMetro) score += 15;
|
|
|
|
// Metro time penalty (estimated)
|
|
score -= l.estimatedMetroTime * 0.5;
|
|
|
|
l.score = Math.round(score * 10) / 10;
|
|
return l;
|
|
})
|
|
.sort((a, b) => b.score - a.score) // Higher score = better
|
|
.slice(0, CONFIG.topN);
|
|
|
|
console.log(`✅ Filtered to ${filtered.length} suitable options`);
|
|
|
|
// Save results
|
|
fs.writeFileSync(CONFIG.outputFile, JSON.stringify(filtered, null, 2));
|
|
|
|
// Generate markdown table
|
|
const markdown = generateMarkdown(filtered);
|
|
fs.writeFileSync(CONFIG.outputMarkdown, markdown);
|
|
|
|
console.log(`💾 Results saved to:`);
|
|
console.log(` - ${CONFIG.outputFile}`);
|
|
console.log(` - ${CONFIG.outputMarkdown}`);
|
|
|
|
// Print top 5
|
|
console.log('\n🏆 TOP 5 OPTIONS:');
|
|
filtered.slice(0, 5).forEach((l, i) => {
|
|
console.log(`\n${i + 1}. ${l.title}`);
|
|
console.log(` 💰 ${l.price} (${l.priceNum} RMB)`);
|
|
console.log(` 📍 ${l.location}`);
|
|
console.log(` ⭐ Score: ${l.score}`);
|
|
console.log(` 🔗 ${l.url}`);
|
|
});
|
|
|
|
await browser.close();
|
|
}
|
|
|
|
function generateMarkdown(listings) {
|
|
let md = '# Xiaozhu Search Results - Xujiahui Campus Area\n\n';
|
|
md += `**Search Date:** ${new Date().toLocaleDateString()}\n`;
|
|
md += `**Check-in:** ${CONFIG.checkIn}\n`;
|
|
md += `**Check-out:** ${CONFIG.checkOut}\n`;
|
|
md += `**Budget:** ${CONFIG.budgetMin}-${CONFIG.budgetMax} RMB/month (ideal: ${CONFIG.budgetIdeal})\n\n`;
|
|
|
|
md += '| Rank | Price | Location | Kitchen | Fridge | Washer | Metro | Score | Link |\n';
|
|
md += '|------|-------|----------|---------|--------|--------|-------|-------|------|\n';
|
|
|
|
listings.forEach((l, i) => {
|
|
md += `| ${i + 1} `;
|
|
md += `| ${l.priceNum} RMB `;
|
|
md += `| ${l.location.substring(0, 30)} `;
|
|
md += `| ${l.hasKitchen ? '✓' : '✗'} `;
|
|
md += `| ${l.hasFridge ? '✓' : '✗'} `;
|
|
md += `| ${l.hasWashingMachine ? '✓' : '✗'} `;
|
|
md += `| ${l.hasMetro ? '✓' : '✗'} `;
|
|
md += `| ${l.score} `;
|
|
md += `| [View](${l.url}) |\n`;
|
|
});
|
|
|
|
md += '\n## Legend\n';
|
|
md += '- **Kitchen**: 厨房 required\n';
|
|
md += '- **Fridge**: 冰箱 required\n';
|
|
md += '- **Washer**: 洗衣机 bonus\n';
|
|
md += '- **Metro**: Near metro station bonus\n';
|
|
md += '- **Score**: Higher = better (price + amenities + location)\n';
|
|
|
|
return md;
|
|
}
|
|
|
|
// Main execution
|
|
(async () => {
|
|
if (process.env.LOGIN_MODE === 'true') {
|
|
await loginMode();
|
|
} else {
|
|
await scrapXiaozhu();
|
|
}
|
|
})();
|