couple_matters/tools/xiaozhu_interactive.js
StillHammer 92c2a9f022 Sync couple_matters: December crisis, separation agreement, daily check v2, xiaozhu search
Major updates:
- December 2025 crisis documentation and separation agreement
- Daily check system v2 with multiple card categories
- Xiaozhu rental search tools and results
- Exit plan documentation
- Message drafts for family communication
- Confluent moved to CONSTANT
- Updated profiles and promises

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-23 06:54:08 +08:00

582 lines
18 KiB
JavaScript
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const puppeteer = require('puppeteer');
const fs = require('fs');
/**
* Xiaozhu Interactive Scraper - Full navigation simulation
* Simulates real user behavior to navigate and extract listings
*/
const CONFIG = {
// Search criteria
city: '上海',
district: '徐汇区',
keyword: '交通大学',
// Dates
checkIn: '2025-12-24',
checkOut: '2026-01-22',
days: 29,
// Budget (RMB)
budgetIdeal: 4000,
budgetMax: 5000,
get dailyBudgetIdeal() {
return Math.ceil(this.budgetIdeal / 30 * this.days);
},
get dailyBudgetMax() {
return Math.ceil(this.budgetMax / 30 * this.days);
},
// Equipment
required: ['厨房', '冰箱'],
bonus: ['洗衣机', '地铁'],
// Scraping config
maxScrolls: 10,
scrollDelay: 2000,
interactionDelay: 1000,
// Output
outputFile: './xiaozhu_results.json',
outputMarkdown: './xiaozhu_results.md',
topN: 20,
// Debug
headless: true,
screenshots: true
};
console.log('🚀 Xiaozhu Interactive Scraper');
console.log(`📍 Target: ${CONFIG.city} ${CONFIG.district}`);
console.log(`📅 Dates: ${CONFIG.checkIn}${CONFIG.checkOut} (${CONFIG.days} days)`);
console.log(`💰 Budget: ${CONFIG.budgetIdeal}-${CONFIG.budgetMax} RMB/month (${CONFIG.dailyBudgetIdeal}-${CONFIG.dailyBudgetMax} RMB total)\n`);
async function wait(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function screenshot(page, name) {
if (CONFIG.screenshots) {
const filename = `./xiaozhu_${name}_${Date.now()}.png`;
await page.screenshot({ path: filename, fullPage: true });
console.log(`📸 Screenshot: ${filename}`);
}
}
async function loadCookies() {
try {
const cookies = fs.readFileSync('./xiaozhu_cookies.json', 'utf8');
return JSON.parse(cookies);
} catch (err) {
console.log('⚠️ No cookies found (optional)');
return null;
}
}
async function scrapXiaozhu() {
const cookies = await loadCookies();
const browser = await puppeteer.launch({
headless: CONFIG.headless ? "new" : false,
defaultViewport: { width: 414, height: 896 }, // Mobile viewport (Xiaozhu is mobile-first)
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage'
]
});
const page = await browser.newPage();
// Mobile user agent
await page.setUserAgent('Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1');
// Load cookies if available
if (cookies && cookies.length > 0) {
try {
await page.setCookie(...cookies);
console.log(`🍪 Loaded ${cookies.length} cookies\n`);
} catch (err) {
console.log('⚠️ Could not load cookies:', err.message);
}
}
try {
console.log('🌐 Loading homepage...');
await page.goto('https://minsu.xiaozhu.com/', {
waitUntil: 'networkidle2',
timeout: 30000
});
await wait(3000); // Wait for Vue app to initialize
await screenshot(page, 'homepage');
console.log('✅ Homepage loaded\n');
// Strategy 1: Look for search input
console.log('🔍 Looking for search input...');
const searchSelectors = [
'input[placeholder*="目的地"]',
'input[placeholder*="搜索"]',
'input[placeholder*="城市"]',
'.search-input',
'.van-search__content input',
'input[type="search"]',
'input[type="text"]'
];
let searchInput = null;
let inputSelector = null;
for (const selector of searchSelectors) {
try {
const element = await page.$(selector);
if (element) {
const isVisible = await page.evaluate(el => {
const rect = el.getBoundingClientRect();
return rect.width > 0 && rect.height > 0;
}, element);
if (isVisible) {
searchInput = element;
inputSelector = selector;
console.log(`✅ Found search input: ${selector}`);
break;
}
}
} catch (e) {}
}
if (searchInput) {
console.log('⌨️ Entering search query...');
// Click to focus
await searchInput.click();
await wait(500);
// Type search query - try just city first
await searchInput.type(`${CONFIG.city}`, { delay: 150 });
await wait(CONFIG.interactionDelay * 2); // Wait longer for suggestions to load
await screenshot(page, 'search_typed');
// Look for search suggestions or submit button
console.log('👆 Looking for search button or suggestions...');
const submitSelectors = [
'button[type="submit"]',
'.search-button',
'.van-button--primary',
'button.submit',
'.search-btn'
];
let submitted = false;
// Try to click suggestions first
await wait(1500);
// Look for suggestions containing Shanghai
const shanghaiClicked = await page.evaluate((city) => {
const suggestions = document.querySelectorAll('.van-cell, .suggestion-item, [class*="suggest"], .city-item, div[class*="item"]');
for (const sugg of suggestions) {
if (sugg.textContent.includes(city)) {
sugg.click();
return true;
}
}
return false;
}, CONFIG.city);
if (shanghaiClicked) {
console.log(` Clicked suggestion containing ${CONFIG.city}`);
submitted = true;
await wait(4000);
} else {
console.log(` No ${CONFIG.city} suggestion found, trying all suggestions...`);
const suggestions = await page.$$('.van-cell, .suggestion-item, [class*="suggest"]');
if (suggestions.length > 0) {
console.log(` Found ${suggestions.length} suggestions, clicking first...`);
await suggestions[0].click();
submitted = true;
await wait(3000);
}
}
// If no suggestions, try submit button
if (!submitted) {
for (const selector of submitSelectors) {
try {
const button = await page.$(selector);
if (button) {
console.log(` Clicking submit: ${selector}`);
await button.click();
submitted = true;
await wait(3000);
break;
}
} catch (e) {}
}
}
// If still not submitted, try pressing Enter
if (!submitted) {
console.log(' Pressing Enter...');
await page.keyboard.press('Enter');
await wait(3000);
}
await screenshot(page, 'after_search');
} else {
// Strategy 2: Look for city/location selector
console.log('❌ No search input found');
console.log('🔍 Looking for city selector...');
const citySelectors = [
'a:contains("上海")',
'div:contains("上海")',
'.city-item',
'[data-city="shanghai"]'
];
// Try to find and click Shanghai
const cityFound = await page.evaluate((city) => {
const elements = Array.from(document.querySelectorAll('a, div, span'));
const shanghaEl = elements.find(el =>
el.textContent.trim() === city &&
el.getBoundingClientRect().width > 0
);
if (shanghaEl) {
shanghaEl.click();
return true;
}
return false;
}, CONFIG.city);
if (cityFound) {
console.log('✅ Clicked Shanghai');
await wait(3000);
} else {
console.log('⚠️ Could not find city selector');
}
}
// Current URL after navigation
let currentUrl = page.url();
console.log(`\n📍 Current URL: ${currentUrl}`);
// If we're on /suggest page, try to find and click Shanghai
if (currentUrl.includes('/suggest')) {
console.log('⚠️ On suggestions page, looking for Shanghai option...\n');
const shanghaiFound = await page.evaluate((city) => {
// Look for Shanghai in hot recommendations or administrative areas
const items = document.querySelectorAll('.city-hot-item, .city-hot-item2, .city-item, div[class*="item"]');
for (const item of items) {
const text = item.textContent.trim();
if (text === city || text.includes(city)) {
console.log(`Found ${city} option: ${text}`);
item.click();
return true;
}
}
return false;
}, CONFIG.city);
if (shanghaiFound) {
console.log(` ✅ Clicked ${CONFIG.city} from suggestions`);
await wait(4000);
currentUrl = page.url();
console.log(` 📍 New URL: ${currentUrl}`);
} else {
console.log(`${CONFIG.city} not found in suggestions`);
console.log(` 💡 Try searching for just the city name next time\n`);
}
}
await screenshot(page, 'before_extraction');
console.log('');
// Extract listings
console.log('📊 Extracting listings...\n');
let allListings = [];
let previousCount = 0;
let noChangeCount = 0;
// Scroll to load more listings (lazy loading)
for (let i = 0; i < CONFIG.maxScrolls; i++) {
console.log(`🔄 Scroll ${i + 1}/${CONFIG.maxScrolls}...`);
// Extract current listings
const listings = await page.evaluate(() => {
const results = [];
// Possible selectors for listing cards
const selectors = [
'.house-item',
'.room-item',
'.van-card',
'[class*="house"]',
'[class*="room"]',
'[class*="card"]'
];
let items = [];
for (const sel of selectors) {
const elements = document.querySelectorAll(sel);
if (elements.length > items.length) {
items = Array.from(elements);
}
}
items.forEach((item, idx) => {
try {
const listing = {
index: idx + 1,
html: item.innerHTML.substring(0, 500),
text: item.textContent.trim().substring(0, 300)
};
// Extract title
const titleEl = item.querySelector('h2, h3, h4, .title, .name, .van-card__title, [class*="title"]');
if (titleEl) {
listing.title = titleEl.textContent.trim();
}
// Extract price
const pricePatterns = [
'.price', '.van-card__price', '[class*="price"]',
'span:contains("¥")', 'span:contains("元")'
];
for (const pattern of pricePatterns) {
const priceEl = item.querySelector(pattern);
if (priceEl) {
const priceText = priceEl.textContent;
const match = priceText.match(/(\d+)/);
if (match) {
listing.priceDaily = parseInt(match[1]);
listing.priceText = priceText.trim();
break;
}
}
}
// If no price found, search in all text
if (!listing.priceDaily) {
const priceMatch = item.textContent.match(/[¥¥]?\s*(\d+)\s*[元\/]/);
if (priceMatch) {
listing.priceDaily = parseInt(priceMatch[1]);
}
}
// Extract location
const locationEl = item.querySelector('.location, .address, .area, [class*="location"]');
if (locationEl) {
listing.location = locationEl.textContent.trim();
}
// Extract URL
const linkEl = item.querySelector('a');
if (linkEl) {
listing.url = linkEl.href;
}
// Extract image
const imgEl = item.querySelector('img');
if (imgEl) {
listing.image = imgEl.src;
}
// Check equipment mentions in text
const fullText = item.textContent.toLowerCase();
listing.hasKitchen = fullText.includes('厨房') || fullText.includes('kitchen');
listing.hasFridge = fullText.includes('冰箱') || fullText.includes('fridge');
listing.hasWashingMachine = fullText.includes('洗衣机') || fullText.includes('washing');
listing.hasMetro = fullText.includes('地铁') || fullText.includes('metro') || fullText.includes('站');
results.push(listing);
} catch (e) {
console.error('Error extracting listing:', e);
}
});
return results;
});
// Merge with previous (avoid duplicates by URL)
const newListings = listings.filter(l =>
!allListings.some(existing => existing.url === l.url && l.url)
);
allListings = [...allListings, ...newListings];
console.log(` Found ${listings.length} on page, ${newListings.length} new, ${allListings.length} total`);
// Check if we got new listings
if (allListings.length === previousCount) {
noChangeCount++;
if (noChangeCount >= 3) {
console.log(' No new listings for 3 scrolls, stopping...');
break;
}
} else {
noChangeCount = 0;
}
previousCount = allListings.length;
// Scroll down
await page.evaluate(() => {
window.scrollBy(0, window.innerHeight);
});
await wait(CONFIG.scrollDelay);
}
await screenshot(page, 'final');
console.log(`\n✅ Total extracted: ${allListings.length} listings\n`);
if (allListings.length === 0) {
console.log('❌ No listings found!');
console.log('💾 Saving page HTML for inspection...');
const html = await page.content();
fs.writeFileSync('./xiaozhu_interactive_page.html', html);
console.log('\n📋 Page info:');
const pageInfo = await page.evaluate(() => ({
url: window.location.href,
title: document.title,
bodyText: document.body.textContent.substring(0, 500),
elementCount: document.querySelectorAll('*').length
}));
console.log(` URL: ${pageInfo.url}`);
console.log(` Title: ${pageInfo.title}`);
console.log(` Elements: ${pageInfo.elementCount}`);
console.log(` Body preview: ${pageInfo.bodyText.substring(0, 200)}...`);
} else {
// Process and filter listings
const processed = processListings(allListings);
// Save results
fs.writeFileSync(CONFIG.outputFile, JSON.stringify(processed, null, 2));
console.log(`💾 Results saved: ${CONFIG.outputFile}`);
const markdown = generateMarkdown(processed);
fs.writeFileSync(CONFIG.outputMarkdown, markdown);
console.log(`📝 Markdown saved: ${CONFIG.outputMarkdown}`);
// Print top results
printTopResults(processed);
}
} catch (err) {
console.error('❌ Error:', err.message);
console.error(err.stack);
await screenshot(page, 'error');
} finally {
if (CONFIG.headless) {
await browser.close();
} else {
console.log('\n⏸ Browser kept open for inspection. Close manually when done.');
}
}
}
function processListings(listings) {
return listings
.filter(l => l.priceDaily && l.priceDaily > 0)
.map(l => {
// Calculate total price
l.priceTotal = l.priceDaily * CONFIG.days;
l.priceMonthly = Math.ceil(l.priceDaily * 30);
// Score
let score = 0;
// Price scoring
if (l.priceTotal <= CONFIG.dailyBudgetIdeal) {
score += (CONFIG.dailyBudgetIdeal - l.priceTotal) / 100;
} else if (l.priceTotal <= CONFIG.dailyBudgetMax) {
score -= (l.priceTotal - CONFIG.dailyBudgetIdeal) / 50;
} else {
score -= 100;
}
// Equipment bonuses
if (l.hasKitchen) score += 20;
if (l.hasFridge) score += 15;
if (l.hasWashingMachine) score += 10;
if (l.hasMetro) score += 15;
// Location bonus
if (l.location) {
if (l.location.includes(CONFIG.district)) score += 20;
if (l.location.includes(CONFIG.keyword)) score += 10;
}
if (l.title) {
if (l.title.includes(CONFIG.keyword)) score += 10;
}
l.score = Math.round(score * 10) / 10;
return l;
})
.filter(l => l.hasKitchen && l.hasFridge) // Required
.filter(l => l.priceTotal <= CONFIG.dailyBudgetMax) // Budget
.sort((a, b) => b.score - a.score)
.slice(0, CONFIG.topN);
}
function generateMarkdown(listings) {
let md = '# Xiaozhu Search Results - Interactive Scraper\n\n';
md += `**Date:** ${new Date().toLocaleDateString()}\n`;
md += `**Location:** ${CONFIG.city} ${CONFIG.district}\n`;
md += `**Dates:** ${CONFIG.checkIn}${CONFIG.checkOut} (${CONFIG.days} days)\n`;
md += `**Budget:** ${CONFIG.budgetIdeal}-${CONFIG.budgetMax} RMB/month\n\n`;
md += '| # | Title | Daily | Total | Kitchen | Fridge | Washer | Metro | Score | Link |\n';
md += '|---|-------|-------|-------|---------|--------|--------|-------|-------|------|\n';
listings.forEach((l, i) => {
md += `| ${i + 1} `;
md += `| ${(l.title || 'Untitled').substring(0, 40)} `;
md += `| ¥${l.priceDaily} `;
md += `| ¥${l.priceTotal} `;
md += `| ${l.hasKitchen ? '✓' : '✗'} `;
md += `| ${l.hasFridge ? '✓' : '✗'} `;
md += `| ${l.hasWashingMachine ? '✓' : '✗'} `;
md += `| ${l.hasMetro ? '✓' : '✗'} `;
md += `| ${l.score} `;
md += `| ${l.url ? `[View](${l.url})` : '-'} |\n`;
});
return md;
}
function printTopResults(listings) {
console.log('\n🏆 TOP RESULTS:\n');
listings.slice(0, 5).forEach((l, i) => {
console.log(`${i + 1}. ${l.title || 'Untitled'}`);
console.log(` 💰 ¥${l.priceDaily}/day × ${CONFIG.days} days = ¥${l.priceTotal} total (~¥${l.priceMonthly}/month)`);
if (l.location) console.log(` 📍 ${l.location}`);
console.log(` ✓ Kitchen: ${l.hasKitchen ? '✓' : '✗'} | Fridge: ${l.hasFridge ? '✓' : '✗'} | Washer: ${l.hasWashingMachine ? '✓' : '✗'} | Metro: ${l.hasMetro ? '✓' : '✗'}`);
console.log(` ⭐ Score: ${l.score}`);
if (l.url) console.log(` 🔗 ${l.url}`);
console.log('');
});
}
// Run
scrapXiaozhu().catch(console.error);