personal-hub/tools/xiaozhu_scraper.js
StillHammer 3c8162c990 Sync couple_matters: December crisis, separation agreement, daily check v2, xiaozhu search
Major updates:
- December 2025 crisis documentation and separation agreement
- Daily check system v2 with multiple card categories
- Xiaozhu rental search tools and results
- Exit plan documentation
- Message drafts for family communication
- Confluent moved to CONSTANT
- Updated profiles and promises

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-23 07:04:02 +08:00

269 lines
8.6 KiB
JavaScript

const puppeteer = require('puppeteer');
const fs = require('fs');
/**
* Xiaozhu Scraper - Location appart près de Jiaoda Xujiahui Campus
* Dates: 24 dec 2025 → 22 jan 2026
* Budget: 3000-5000 RMB/mois (idéal 3000-4000)
* Critères: Cuisine + frigo requis, machine à laver bonus
*/
const CONFIG = {
// Dates de location
checkIn: '2025-12-24',
checkOut: '2026-01-22',
// Budget (RMB/mois)
budgetMin: 0,
budgetMax: 5000,
budgetIdeal: 4000,
// Localisation (Xujiahui campus)
targetDistrict: '徐汇区', // Xuhui District
targetMetroLines: [1, 7, 9, 10, 11], // Lignes accessibles
maxMetroTime: 25, // minutes max
// Équipements requis
required: ['kitchen', 'fridge'],
bonus: ['washingMachine', 'metro'],
// Output
outputFile: './xiaozhu_results.json',
outputMarkdown: './xiaozhu_results.md',
topN: 20
};
// Stations de métro proches du campus (Xujiahui)
const PREFERRED_STATIONS = [
{ name: '交通大学', lines: [10, 11], minutes: 0 },
{ name: '徐家汇', lines: [1, 9, 11], minutes: 5 },
{ name: '衡山路', lines: [1], minutes: 10 },
{ name: '常熟路', lines: [1, 7], minutes: 10 },
{ name: '上海体育馆', lines: [1, 4], minutes: 15 },
{ name: '龙华', lines: [11, 12], minutes: 15 },
{ name: '漕河泾开发区', lines: [9], minutes: 20 },
{ name: '七宝', lines: [9], minutes: 25 }
];
async function loadCookies() {
try {
const cookies = fs.readFileSync('./xiaozhu_cookies.json', 'utf8');
return JSON.parse(cookies);
} catch (err) {
console.log('❌ Cookies not found. Please login first and save cookies.');
console.log('Instructions:');
console.log('1. Run this script with LOGIN_MODE=true');
console.log('2. Login manually when browser opens');
console.log('3. Press Enter when done to save cookies');
return null;
}
}
async function saveCookies(page) {
const cookies = await page.cookies();
fs.writeFileSync('./xiaozhu_cookies.json', JSON.stringify(cookies, null, 2));
console.log('✅ Cookies saved to xiaozhu_cookies.json');
}
async function loginMode() {
console.log('🔐 LOGIN MODE - Manual login required');
const browser = await puppeteer.launch({
headless: false,
defaultViewport: null
});
const page = await browser.newPage();
await page.goto('https://www.xiaozhu.com');
console.log('📝 Please login manually in the browser...');
console.log('⏸️ Press Enter when you are logged in');
// Wait for user input
await new Promise(resolve => {
process.stdin.once('data', resolve);
});
await saveCookies(page);
await browser.close();
console.log('✅ Login complete! Run the script again without LOGIN_MODE');
}
async function scrapXiaozhu() {
const cookies = await loadCookies();
if (!cookies) {
console.log('Run: LOGIN_MODE=true node xiaozhu_scraper.js');
return;
}
const browser = await puppeteer.launch({
headless: false, // Set to true for production
defaultViewport: null
});
const page = await browser.newPage();
// Set cookies
await page.setCookie(...cookies);
// Navigate to search page
// Note: URL structure needs to be determined based on actual Xiaozhu website
// This is a placeholder - we'll need to inspect the actual site
const searchUrl = `https://www.xiaozhu.com/search-shanghai-${CONFIG.targetDistrict}/`;
console.log(`🔍 Searching: ${searchUrl}`);
await page.goto(searchUrl, { waitUntil: 'networkidle2' });
// Wait for listings to load
// Selector needs to be determined by inspecting the actual page
await page.waitForSelector('.result_list', { timeout: 10000 }).catch(() => {
console.log('⚠️ Timeout waiting for listings. Page structure might have changed.');
});
// Extract listings
const listings = await page.evaluate((config) => {
const results = [];
// This selector needs to be updated based on actual Xiaozhu HTML structure
const cards = document.querySelectorAll('.result_list .result_item');
cards.forEach(card => {
try {
const listing = {
title: card.querySelector('.result_title')?.textContent?.trim() || '',
price: card.querySelector('.result_price')?.textContent?.trim() || '',
priceNum: 0, // Will parse from price string
location: card.querySelector('.result_address')?.textContent?.trim() || '',
url: card.querySelector('a')?.href || '',
image: card.querySelector('img')?.src || '',
// Equipment flags (need to inspect actual HTML)
hasKitchen: false,
hasFridge: false,
hasWashingMachine: false,
hasMetro: false,
// Metro info
nearestStation: '',
metroLines: [],
estimatedMetroTime: 999
};
// Parse price (format: "3500元/月" or similar)
const priceMatch = listing.price.match(/(\d+)/);
if (priceMatch) {
listing.priceNum = parseInt(priceMatch[1]);
}
// Check for equipment keywords in description
const fullText = card.textContent.toLowerCase();
listing.hasKitchen = fullText.includes('厨房') || fullText.includes('kitchen');
listing.hasFridge = fullText.includes('冰箱') || fullText.includes('fridge');
listing.hasWashingMachine = fullText.includes('洗衣机') || fullText.includes('washing');
listing.hasMetro = fullText.includes('地铁') || fullText.includes('metro');
results.push(listing);
} catch (err) {
console.error('Error parsing listing:', err);
}
});
return results;
}, CONFIG);
console.log(`📊 Found ${listings.length} listings`);
// Filter and score
const filtered = listings
.filter(l => l.priceNum > 0 && l.priceNum <= CONFIG.budgetMax)
.filter(l => l.hasKitchen && l.hasFridge) // Must-have
.map(l => {
// Calculate score (lower is better for price, but higher for amenities)
let score = 0;
// Price score (ideal range gets bonus)
if (l.priceNum <= CONFIG.budgetIdeal) {
score += (CONFIG.budgetIdeal - l.priceNum) / 100; // Cheaper = better
} else {
score -= (l.priceNum - CONFIG.budgetIdeal) / 50; // Over ideal = penalty
}
// Amenity bonuses
if (l.hasWashingMachine) score += 10;
if (l.hasMetro) score += 15;
// Metro time penalty (estimated)
score -= l.estimatedMetroTime * 0.5;
l.score = Math.round(score * 10) / 10;
return l;
})
.sort((a, b) => b.score - a.score) // Higher score = better
.slice(0, CONFIG.topN);
console.log(`✅ Filtered to ${filtered.length} suitable options`);
// Save results
fs.writeFileSync(CONFIG.outputFile, JSON.stringify(filtered, null, 2));
// Generate markdown table
const markdown = generateMarkdown(filtered);
fs.writeFileSync(CONFIG.outputMarkdown, markdown);
console.log(`💾 Results saved to:`);
console.log(` - ${CONFIG.outputFile}`);
console.log(` - ${CONFIG.outputMarkdown}`);
// Print top 5
console.log('\n🏆 TOP 5 OPTIONS:');
filtered.slice(0, 5).forEach((l, i) => {
console.log(`\n${i + 1}. ${l.title}`);
console.log(` 💰 ${l.price} (${l.priceNum} RMB)`);
console.log(` 📍 ${l.location}`);
console.log(` ⭐ Score: ${l.score}`);
console.log(` 🔗 ${l.url}`);
});
await browser.close();
}
function generateMarkdown(listings) {
let md = '# Xiaozhu Search Results - Xujiahui Campus Area\n\n';
md += `**Search Date:** ${new Date().toLocaleDateString()}\n`;
md += `**Check-in:** ${CONFIG.checkIn}\n`;
md += `**Check-out:** ${CONFIG.checkOut}\n`;
md += `**Budget:** ${CONFIG.budgetMin}-${CONFIG.budgetMax} RMB/month (ideal: ${CONFIG.budgetIdeal})\n\n`;
md += '| Rank | Price | Location | Kitchen | Fridge | Washer | Metro | Score | Link |\n';
md += '|------|-------|----------|---------|--------|--------|-------|-------|------|\n';
listings.forEach((l, i) => {
md += `| ${i + 1} `;
md += `| ${l.priceNum} RMB `;
md += `| ${l.location.substring(0, 30)} `;
md += `| ${l.hasKitchen ? '✓' : '✗'} `;
md += `| ${l.hasFridge ? '✓' : '✗'} `;
md += `| ${l.hasWashingMachine ? '✓' : '✗'} `;
md += `| ${l.hasMetro ? '✓' : '✗'} `;
md += `| ${l.score} `;
md += `| [View](${l.url}) |\n`;
});
md += '\n## Legend\n';
md += '- **Kitchen**: 厨房 required\n';
md += '- **Fridge**: 冰箱 required\n';
md += '- **Washer**: 洗衣机 bonus\n';
md += '- **Metro**: Near metro station bonus\n';
md += '- **Score**: Higher = better (price + amenities + location)\n';
return md;
}
// Main execution
(async () => {
if (process.env.LOGIN_MODE === 'true') {
await loginMode();
} else {
await scrapXiaozhu();
}
})();