personal-hub/tools/xiaozhu_minsu_scraper.js
StillHammer 3c8162c990 Sync couple_matters: December crisis, separation agreement, daily check v2, xiaozhu search
Major updates:
- December 2025 crisis documentation and separation agreement
- Daily check system v2 with multiple card categories
- Xiaozhu rental search tools and results
- Exit plan documentation
- Message drafts for family communication
- Confluent moved to CONSTANT
- Updated profiles and promises

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-23 07:04:02 +08:00

400 lines
13 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const puppeteer = require('puppeteer');
const fs = require('fs');
/**
* Xiaozhu Minsu Scraper - Client interface scraper
* URL: https://minsu.xiaozhu.com/
*
* Critères:
* - Xujiahui District (徐汇区) - près de Jiaotong University
* - 24 déc 2025 → 22 jan 2026 (29 jours)
* - Budget: 3000-5000 RMB/mois (idéal 3000-4000)
* - Must-have: Cuisine + frigo
* - Nice: Machine à laver, proche métro
*/
const CONFIG = {
// Dates
checkIn: '2025-12-24',
checkOut: '2026-01-22',
// Location
city: '上海',
district: '徐汇区',
keyword: '交通大学', // Near Jiaotong University
// Budget (RMB/month)
budgetMin: 0,
budgetMax: 5000,
budgetIdeal: 4000,
// Calculate daily budget (29 days)
days: 29,
get dailyBudgetMax() {
return Math.ceil(this.budgetMax / 30 * this.days);
},
get dailyBudgetIdeal() {
return Math.ceil(this.budgetIdeal / 30 * this.days);
},
// Equipment
required: ['厨房', '冰箱'], // Kitchen, fridge
bonus: ['洗衣机', '地铁'], // Washing machine, metro
// Output
outputFile: './xiaozhu_minsu_results.json',
outputMarkdown: './xiaozhu_minsu_results.md',
topN: 20
};
console.log('💰 Budget calculation:');
console.log(` Monthly budget: ${CONFIG.budgetIdeal}-${CONFIG.budgetMax} RMB`);
console.log(` Stay duration: ${CONFIG.days} days`);
console.log(` Daily budget: ${CONFIG.dailyBudgetIdeal}-${CONFIG.dailyBudgetMax} RMB total`);
async function loadCookies() {
try {
const cookies = fs.readFileSync('./xiaozhu_cookies.json', 'utf8');
return JSON.parse(cookies);
} catch (err) {
console.log('⚠️ No cookies found!');
console.log('\n📋 SETUP REQUIRED:');
console.log('1. Go to https://minsu.xiaozhu.com/ in Firefox');
console.log('2. Login if needed');
console.log('3. Extract cookies using firefox_cookie_converter.js');
console.log('4. Run this script again\n');
return null;
}
}
async function scrapMinsuXiaozhu() {
const cookies = await loadCookies();
const browser = await puppeteer.launch({
headless: "new",
defaultViewport: { width: 1920, height: 1080 },
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const page = await browser.newPage();
// Set user agent
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
// Load cookies if available
if (cookies && cookies.length > 0) {
await page.setCookie(...cookies);
console.log(`🍪 Loaded ${cookies.length} cookies`);
}
try {
// Try different search URL patterns
const searchUrls = [
`https://minsu.xiaozhu.com/search-shanghai-${CONFIG.district}/`,
`https://minsu.xiaozhu.com/shanghai/${CONFIG.district}/`,
`https://minsu.xiaozhu.com/search?city=shanghai&district=${CONFIG.district}`,
'https://minsu.xiaozhu.com/shanghai/',
'https://minsu.xiaozhu.com/'
];
let pageLoaded = false;
let currentUrl = '';
for (const url of searchUrls) {
console.log(`\n🔍 Trying: ${url}`);
try {
await page.goto(url, {
waitUntil: 'networkidle2',
timeout: 20000
});
currentUrl = page.url();
console.log(`✅ Loaded: ${currentUrl}`);
// Wait for content
await new Promise(resolve => setTimeout(resolve, 3000));
// Check if we found listings
const hasListings = await page.evaluate(() => {
const possibleSelectors = [
'.room-list', '.house-list', '.list-item',
'[class*="room"]', '[class*="house"]', '[class*="result"]'
];
for (const sel of possibleSelectors) {
const elements = document.querySelectorAll(sel);
if (elements.length > 2) return true;
}
return false;
});
if (hasListings || currentUrl.includes('search') || currentUrl.includes('shanghai')) {
pageLoaded = true;
break;
}
} catch (err) {
console.log(` ❌ Failed: ${err.message}`);
}
}
if (!pageLoaded) {
console.log('\n❌ Could not load search page. Trying homepage navigation...');
await page.goto('https://minsu.xiaozhu.com/', { waitUntil: 'networkidle2' });
}
// Take screenshot
await page.screenshot({ path: './xiaozhu_minsu_page.png', fullPage: true });
console.log('📸 Screenshot saved: xiaozhu_minsu_page.png');
// Try to find and use search functionality
console.log('\n🔍 Looking for search input...');
const searchInputSelectors = [
'input[placeholder*="目的地"]',
'input[placeholder*="搜索"]',
'input[placeholder*="城市"]',
'input.search-input',
'#search-input',
'input[type="text"]'
];
let searchFound = false;
for (const selector of searchInputSelectors) {
try {
const input = await page.$(selector);
if (input) {
console.log(`✅ Found search input: ${selector}`);
// Type search query
await input.click();
await page.keyboard.type(`${CONFIG.city} ${CONFIG.district}`, { delay: 100 });
await new Promise(resolve => setTimeout(resolve, 1500));
// Try to submit
await page.keyboard.press('Enter');
await new Promise(resolve => setTimeout(resolve, 3000));
searchFound = true;
break;
}
} catch (e) {}
}
if (!searchFound) {
console.log('⚠️ No search input found, will extract from current page');
}
// Extract listings
console.log('\n📊 Extracting listings...');
const listings = await page.evaluate((config) => {
const results = [];
// Possible selectors for listing items
const containerSelectors = [
'.pho_item', '.room_box', '.house-item', '.result-item',
'[class*="room-item"]', '[class*="house-item"]',
'[class*="card"]'
];
let listingElements = [];
for (const selector of containerSelectors) {
const elements = document.querySelectorAll(selector);
if (elements.length > 2 && elements.length < 200) {
listingElements = Array.from(elements);
console.log(`Found ${elements.length} items with selector: ${selector}`);
break;
}
}
listingElements.forEach((item, index) => {
if (index >= 50) return; // Limit to first 50
const listing = {
index: index + 1,
raw: item.textContent.substring(0, 300)
};
// Extract title
const titleEl = item.querySelector('h2, h3, h4, [class*="title"], [class*="name"]');
if (titleEl) {
listing.title = titleEl.textContent.trim();
}
// Extract price
const priceSelectors = [
'[class*="price"]', '[class*="money"]',
'span:contains("¥")', 'span:contains("元")'
];
for (const priceSel of priceSelectors) {
try {
const priceEl = item.querySelector(priceSel);
if (priceEl) {
const priceText = priceEl.textContent;
const priceMatch = priceText.match(/(\d+)/);
if (priceMatch) {
listing.priceDaily = parseInt(priceMatch[1]);
listing.priceText = priceText;
break;
}
}
} catch (e) {}
}
// Extract location
const locationEl = item.querySelector('[class*="location"], [class*="address"], [class*="area"]');
if (locationEl) {
listing.location = locationEl.textContent.trim();
}
// Extract URL
const linkEl = item.querySelector('a');
if (linkEl) {
listing.url = linkEl.href;
}
// Extract image
const imgEl = item.querySelector('img');
if (imgEl) {
listing.image = imgEl.src;
}
// Check for equipment keywords
const fullText = item.textContent;
listing.hasKitchen = fullText.includes('厨房') || fullText.includes('kitchen');
listing.hasFridge = fullText.includes('冰箱') || fullText.includes('fridge');
listing.hasWashingMachine = fullText.includes('洗衣机') || fullText.includes('washing');
listing.hasMetro = fullText.includes('地铁') || fullText.includes('metro');
if (listing.title || listing.priceDaily) {
results.push(listing);
}
});
return results;
}, CONFIG);
console.log(`✅ Extracted ${listings.length} listings`);
if (listings.length === 0) {
console.log('\n❌ No listings found. Saving page HTML for manual inspection...');
const html = await page.content();
fs.writeFileSync('./xiaozhu_minsu_page.html', html);
console.log('💾 HTML saved to: xiaozhu_minsu_page.html');
} else {
// Calculate total price for stay duration
const filtered = listings
.filter(l => l.priceDaily > 0)
.map(l => {
l.priceTotal = l.priceDaily * CONFIG.days;
l.priceMonthly = Math.ceil(l.priceDaily * 30);
// Score calculation
let score = 0;
// Price score
if (l.priceTotal <= CONFIG.dailyBudgetIdeal) {
score += (CONFIG.dailyBudgetIdeal - l.priceTotal) / 100;
} else if (l.priceTotal <= CONFIG.dailyBudgetMax) {
score -= (l.priceTotal - CONFIG.dailyBudgetIdeal) / 50;
} else {
score -= 100; // Over budget penalty
}
// Equipment bonuses
if (l.hasKitchen) score += 20;
if (l.hasFridge) score += 15;
if (l.hasWashingMachine) score += 10;
if (l.hasMetro) score += 15;
// Location bonus (if contains keyword)
if (l.location && l.location.includes(CONFIG.district)) score += 20;
if (l.title && l.title.includes(CONFIG.keyword)) score += 10;
l.score = Math.round(score * 10) / 10;
return l;
})
.filter(l => l.hasKitchen && l.hasFridge) // Must-have requirements
.filter(l => l.priceTotal <= CONFIG.dailyBudgetMax) // Budget filter
.sort((a, b) => b.score - a.score)
.slice(0, CONFIG.topN);
console.log(`\n✅ Filtered to ${filtered.length} suitable options`);
// Save results
fs.writeFileSync(CONFIG.outputFile, JSON.stringify(filtered, null, 2));
console.log(`💾 Results saved to: ${CONFIG.outputFile}`);
// Generate markdown
const markdown = generateMarkdown(filtered);
fs.writeFileSync(CONFIG.outputMarkdown, markdown);
console.log(`📝 Markdown saved to: ${CONFIG.outputMarkdown}`);
// Print top 5
console.log('\n🏆 TOP 5 OPTIONS:\n');
filtered.slice(0, 5).forEach((l, i) => {
console.log(`${i + 1}. ${l.title || 'No title'}`);
console.log(` 💰 ${l.priceDaily} RMB/day × ${CONFIG.days} days = ${l.priceTotal} RMB total (~${l.priceMonthly} RMB/month)`);
if (l.location) console.log(` 📍 ${l.location}`);
console.log(` ✓ Kitchen: ${l.hasKitchen ? '✓' : '✗'} | Fridge: ${l.hasFridge ? '✓' : '✗'} | Washer: ${l.hasWashingMachine ? '✓' : '✗'} | Metro: ${l.hasMetro ? '✓' : '✗'}`);
console.log(` ⭐ Score: ${l.score}`);
if (l.url) console.log(` 🔗 ${l.url}`);
console.log('');
});
}
// Save final screenshot
await page.screenshot({ path: './xiaozhu_minsu_final.png', fullPage: true });
console.log('📸 Final screenshot: xiaozhu_minsu_final.png');
} catch (err) {
console.error('❌ Error:', err.message);
console.error(err.stack);
} finally {
await browser.close();
}
}
function generateMarkdown(listings) {
let md = '# Xiaozhu Minsu Search Results - Xujiahui District\n\n';
md += `**Search Date:** ${new Date().toLocaleDateString()}\n`;
md += `**Check-in:** ${CONFIG.checkIn}\n`;
md += `**Check-out:** ${CONFIG.checkOut}\n`;
md += `**Duration:** ${CONFIG.days} days\n`;
md += `**Daily Budget:** ${CONFIG.dailyBudgetIdeal}-${CONFIG.dailyBudgetMax} RMB total\n`;
md += `**Monthly Equivalent:** ${CONFIG.budgetIdeal}-${CONFIG.budgetMax} RMB/month\n\n`;
md += '| # | Title | Daily | Total | Kitchen | Fridge | Washer | Metro | Score | Link |\n';
md += '|---|-------|-------|-------|---------|--------|--------|-------|-------|------|\n';
listings.forEach((l, i) => {
md += `| ${i + 1} `;
md += `| ${(l.title || 'No title').substring(0, 40)} `;
md += `| ¥${l.priceDaily} `;
md += `| ¥${l.priceTotal} `;
md += `| ${l.hasKitchen ? '✓' : '✗'} `;
md += `| ${l.hasFridge ? '✓' : '✗'} `;
md += `| ${l.hasWashingMachine ? '✓' : '✗'} `;
md += `| ${l.hasMetro ? '✓' : '✗'} `;
md += `| ${l.score} `;
md += `| ${l.url ? `[View](${l.url})` : '-'} |\n`;
});
md += '\n## Legend\n\n';
md += '- **Daily**: Price per day (RMB)\n';
md += `- **Total**: Total price for ${CONFIG.days} days stay\n`;
md += '- **Kitchen**: 厨房 (required)\n';
md += '- **Fridge**: 冰箱 (required)\n';
md += '- **Washer**: 洗衣机 (bonus)\n';
md += '- **Metro**: Near metro station (bonus)\n';
md += '- **Score**: Higher = better (price + amenities + location)\n';
return md;
}
// Run
scrapMinsuXiaozhu().catch(console.error);