Major updates: - December 2025 crisis documentation and separation agreement - Daily check system v2 with multiple card categories - Xiaozhu rental search tools and results - Exit plan documentation - Message drafts for family communication - Confluent moved to CONSTANT - Updated profiles and promises 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
667 lines
23 KiB
JavaScript
667 lines
23 KiB
JavaScript
const puppeteer = require('puppeteer');
|
||
const fs = require('fs');
|
||
|
||
/**
|
||
* Xiaozhu Fixed Scraper - With geolocation override and smart navigation
|
||
* Fixes: Geolocation to Shanghai, better suggestion detection, city verification
|
||
*/
|
||
|
||
const CONFIG = {
|
||
// Location - Search specifically for Jiaotong University area
|
||
city: '上海',
|
||
searchQuery: '交通大学', // Just the university name for better suggestions
|
||
cityEnglish: 'shanghai',
|
||
district: '徐汇区',
|
||
keyword: '交通大学',
|
||
|
||
// Shanghai Xujiahui coordinates
|
||
latitude: 31.1880,
|
||
longitude: 121.4367,
|
||
|
||
// Dates
|
||
checkIn: '2025-12-24',
|
||
checkOut: '2026-01-22',
|
||
days: 29,
|
||
|
||
// Budget
|
||
budgetIdeal: 4000,
|
||
budgetMax: 5000,
|
||
get dailyBudgetIdeal() {
|
||
return Math.ceil(this.budgetIdeal / 30 * this.days);
|
||
},
|
||
get dailyBudgetMax() {
|
||
return Math.ceil(this.budgetMax / 30 * this.days);
|
||
},
|
||
|
||
// Equipment
|
||
required: ['厨房', '冰箱'],
|
||
bonus: ['洗衣机', '地铁'],
|
||
|
||
// Scraping - More aggressive to load everything
|
||
maxScrolls: 50,
|
||
scrollDelay: 3500, // Longer wait for lazy load
|
||
interactionDelay: 1000,
|
||
noChangeThreshold: 7, // Wait 7 scrolls without change before stopping
|
||
|
||
// Output
|
||
outputFile: './xiaozhu_results.json',
|
||
outputMarkdown: './xiaozhu_results.md',
|
||
topN: 20,
|
||
|
||
// Debug
|
||
headless: true,
|
||
screenshots: true
|
||
};
|
||
|
||
console.log('🚀 Xiaozhu FIXED Scraper - Jiaotong University Focus');
|
||
console.log(`📍 Search: ${CONFIG.searchQuery}`);
|
||
console.log(`🎯 Target: ${CONFIG.keyword} (${CONFIG.district})`);
|
||
console.log(`🌍 Geolocation: ${CONFIG.latitude}, ${CONFIG.longitude}`);
|
||
console.log(`📅 Dates: ${CONFIG.checkIn} → ${CONFIG.checkOut} (${CONFIG.days} days)`);
|
||
console.log(`💰 Budget: ${CONFIG.budgetIdeal}-${CONFIG.budgetMax} RMB/month\n`);
|
||
|
||
async function wait(ms) {
|
||
return new Promise(resolve => setTimeout(resolve, ms));
|
||
}
|
||
|
||
async function screenshot(page, name) {
|
||
if (CONFIG.screenshots) {
|
||
const filename = `./xiaozhu_${name}_${Date.now()}.png`;
|
||
await page.screenshot({ path: filename, fullPage: true });
|
||
console.log(`📸 ${filename}`);
|
||
}
|
||
}
|
||
|
||
async function loadCookies() {
|
||
try {
|
||
const cookies = fs.readFileSync('./xiaozhu_cookies.json', 'utf8');
|
||
return JSON.parse(cookies);
|
||
} catch (err) {
|
||
return null;
|
||
}
|
||
}
|
||
|
||
async function scrapXiaozhu() {
|
||
const cookies = await loadCookies();
|
||
|
||
const browser = await puppeteer.launch({
|
||
headless: CONFIG.headless ? "new" : false,
|
||
defaultViewport: { width: 414, height: 896 },
|
||
args: [
|
||
'--no-sandbox',
|
||
'--disable-setuid-sandbox',
|
||
'--disable-dev-shm-usage'
|
||
]
|
||
});
|
||
|
||
const page = await browser.newPage();
|
||
|
||
// Mobile user agent
|
||
await page.setUserAgent('Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1');
|
||
|
||
// ===== FIX 1: Override geolocation to Shanghai =====
|
||
console.log('🌍 Setting geolocation to Shanghai Xujiahui...');
|
||
|
||
const context = browser.defaultBrowserContext();
|
||
await context.overridePermissions('https://minsu.xiaozhu.com', ['geolocation']);
|
||
|
||
await page.setGeolocation({
|
||
latitude: CONFIG.latitude,
|
||
longitude: CONFIG.longitude,
|
||
accuracy: 100
|
||
});
|
||
|
||
console.log(`✅ Geolocation set to ${CONFIG.latitude}, ${CONFIG.longitude}\n`);
|
||
|
||
// Load cookies
|
||
if (cookies && cookies.length > 0) {
|
||
try {
|
||
await page.setCookie(...cookies);
|
||
console.log(`🍪 Loaded ${cookies.length} cookies\n`);
|
||
} catch (err) {
|
||
console.log('⚠️ Cookie error:', err.message);
|
||
}
|
||
}
|
||
|
||
try {
|
||
// ===== FIX 2: Try direct URL first =====
|
||
console.log('🔍 Strategy 1: Trying direct Shanghai URL...');
|
||
|
||
const directUrls = [
|
||
`https://minsu.xiaozhu.com/${CONFIG.cityEnglish}`,
|
||
`https://minsu.xiaozhu.com/city/${CONFIG.cityEnglish}`,
|
||
`https://minsu.xiaozhu.com/search/${CONFIG.cityEnglish}`,
|
||
`https://minsu.xiaozhu.com/shanghai/${CONFIG.district}`
|
||
];
|
||
|
||
let successUrl = null;
|
||
for (const url of directUrls) {
|
||
try {
|
||
console.log(` Trying: ${url}`);
|
||
await page.goto(url, { waitUntil: 'networkidle2', timeout: 15000 });
|
||
await wait(2000);
|
||
|
||
const is404 = await page.evaluate(() => {
|
||
return document.body.textContent.includes('404') ||
|
||
document.body.textContent.includes('找不到');
|
||
});
|
||
|
||
if (!is404) {
|
||
console.log(` ✅ Success!`);
|
||
successUrl = url;
|
||
await screenshot(page, 'direct_url_success');
|
||
break;
|
||
} else {
|
||
console.log(` ❌ 404`);
|
||
}
|
||
} catch (e) {
|
||
console.log(` ❌ Failed: ${e.message}`);
|
||
}
|
||
}
|
||
|
||
// If direct URL failed, use homepage search
|
||
if (!successUrl) {
|
||
console.log('\n🔍 Strategy 2: Homepage search with geolocation...');
|
||
|
||
await page.goto('https://minsu.xiaozhu.com/', {
|
||
waitUntil: 'networkidle2',
|
||
timeout: 30000
|
||
});
|
||
|
||
await wait(3000);
|
||
await screenshot(page, 'homepage');
|
||
|
||
// Check if geolocation worked and we see Shanghai content
|
||
const cityDetected = await page.evaluate(() => {
|
||
const bodyText = document.body.textContent;
|
||
if (bodyText.includes('上海') || bodyText.includes('Shanghai')) {
|
||
return '上海';
|
||
} else if (bodyText.includes('北京') || bodyText.includes('Beijing')) {
|
||
return '北京';
|
||
}
|
||
return 'unknown';
|
||
});
|
||
|
||
console.log(` Detected city: ${cityDetected}`);
|
||
|
||
// Find search input
|
||
console.log('\n⌨️ Using search...');
|
||
|
||
const searchSelectors = [
|
||
'input[placeholder*="目的地"]',
|
||
'input[placeholder*="搜索"]',
|
||
'input[type="search"]',
|
||
'input[type="text"]'
|
||
];
|
||
|
||
let searchInput = null;
|
||
for (const selector of searchSelectors) {
|
||
searchInput = await page.$(selector);
|
||
if (searchInput) {
|
||
console.log(` Found: ${selector}`);
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (searchInput) {
|
||
await searchInput.click();
|
||
await wait(500);
|
||
|
||
// ===== FIX 3: Clear any pre-filled text first =====
|
||
await page.keyboard.down('Control');
|
||
await page.keyboard.press('A');
|
||
await page.keyboard.up('Control');
|
||
await page.keyboard.press('Backspace');
|
||
|
||
// Type specific search query for Jiaotong University
|
||
await searchInput.type(CONFIG.searchQuery, { delay: 150 });
|
||
await wait(2000); // Wait for suggestions
|
||
|
||
await screenshot(page, 'search_typed');
|
||
|
||
// ===== FIX 4: Smart suggestion detection =====
|
||
console.log(`\n👆 Looking for suggestions matching "${CONFIG.searchQuery}"...`);
|
||
|
||
const shanghaiClicked = await page.evaluate((searchQuery, keyword) => {
|
||
// Look for suggestions containing our keyword (交通大学)
|
||
const allElements = document.querySelectorAll('div, li, a, span');
|
||
const matchingElements = [];
|
||
|
||
for (const el of allElements) {
|
||
const text = el.textContent.trim();
|
||
const rect = el.getBoundingClientRect();
|
||
|
||
// Must be visible
|
||
if (rect.width > 0 && rect.height > 0) {
|
||
// Prioritize exact keyword match
|
||
if (text.includes(keyword)) {
|
||
matchingElements.push({ el, text, score: 100 });
|
||
}
|
||
// Or search query match
|
||
else if (text.includes(searchQuery)) {
|
||
matchingElements.push({ el, text, score: 80 });
|
||
}
|
||
// Or contains Shanghai
|
||
else if (text.includes('上海') && text.length < 15) {
|
||
matchingElements.push({ el, text, score: 30 });
|
||
}
|
||
}
|
||
}
|
||
|
||
// Sort by score and click best match
|
||
matchingElements.sort((a, b) => b.score - a.score);
|
||
|
||
if (matchingElements.length > 0) {
|
||
console.log(`Found ${matchingElements.length} matching elements, clicking best: "${matchingElements[0].text}"`);
|
||
matchingElements[0].el.click();
|
||
return true;
|
||
}
|
||
|
||
return false;
|
||
}, CONFIG.searchQuery, CONFIG.keyword);
|
||
|
||
if (shanghaiClicked) {
|
||
console.log(' ✅ Clicked matching suggestion');
|
||
await wait(4000);
|
||
await screenshot(page, 'after_suggestion');
|
||
} else {
|
||
console.log(' ⚠️ No matching suggestion, pressing Enter...');
|
||
await page.keyboard.press('Enter');
|
||
await wait(3000);
|
||
}
|
||
}
|
||
}
|
||
|
||
// ===== FIX 5: Verify we're on Shanghai, if not, fix it =====
|
||
let currentUrl = page.url();
|
||
console.log(`\n📍 Current URL: ${currentUrl}`);
|
||
|
||
const cityCheck = await page.evaluate(() => {
|
||
const text = document.body.textContent;
|
||
return {
|
||
hasShanghai: text.includes('上海') || text.includes('Shanghai'),
|
||
hasBeijing: text.includes('北京') || text.includes('Beijing') ||
|
||
text.includes('天安门') || text.includes('朝阳'),
|
||
bodyPreview: text.substring(0, 300)
|
||
};
|
||
});
|
||
|
||
console.log(` Shanghai content: ${cityCheck.hasShanghai ? '✅' : '❌'}`);
|
||
console.log(` Beijing content: ${cityCheck.hasBeijing ? '⚠️ YES' : '✅ No'}`);
|
||
|
||
if (cityCheck.hasBeijing && !cityCheck.hasShanghai) {
|
||
console.log('\n🔧 Detected Beijing, attempting to switch to Shanghai...');
|
||
|
||
// Try to find Shanghai in the page
|
||
const switched = await page.evaluate((city) => {
|
||
// Look for any clickable Shanghai element
|
||
const elements = Array.from(document.querySelectorAll('a, div, span, button'));
|
||
|
||
for (const el of elements) {
|
||
const text = el.textContent.trim();
|
||
if ((text === city || text === city + '市') && el.getBoundingClientRect().width > 0) {
|
||
console.log(`Clicking: "${text}"`);
|
||
el.click();
|
||
return true;
|
||
}
|
||
}
|
||
|
||
// Try searching in a visible input
|
||
const inputs = document.querySelectorAll('input[type="text"], input[type="search"]');
|
||
for (const input of inputs) {
|
||
if (input.getBoundingClientRect().width > 0) {
|
||
input.value = city;
|
||
input.dispatchEvent(new Event('input', { bubbles: true }));
|
||
|
||
// Try to submit
|
||
const form = input.closest('form');
|
||
if (form) {
|
||
form.dispatchEvent(new Event('submit', { bubbles: true }));
|
||
return true;
|
||
}
|
||
|
||
// Or press Enter
|
||
const enterEvent = new KeyboardEvent('keydown', {
|
||
key: 'Enter',
|
||
code: 'Enter',
|
||
keyCode: 13,
|
||
bubbles: true
|
||
});
|
||
input.dispatchEvent(enterEvent);
|
||
return true;
|
||
}
|
||
}
|
||
|
||
return false;
|
||
}, CONFIG.city);
|
||
|
||
if (switched) {
|
||
console.log(' ✅ Triggered Shanghai switch');
|
||
await wait(4000);
|
||
await screenshot(page, 'after_switch');
|
||
currentUrl = page.url();
|
||
console.log(` 📍 New URL: ${currentUrl}`);
|
||
} else {
|
||
console.log(' ❌ Could not find Shanghai option');
|
||
}
|
||
}
|
||
|
||
// Extract listings
|
||
console.log('\n📊 Extracting listings...\n');
|
||
|
||
let allListings = [];
|
||
let previousCount = 0;
|
||
let noChangeCount = 0;
|
||
|
||
// FIRST: Scroll to bottom to trigger all lazy loading at once
|
||
console.log('⏬ Scrolling to page bottom to trigger lazy load...');
|
||
await page.evaluate(() => {
|
||
window.scrollTo(0, document.body.scrollHeight);
|
||
});
|
||
await wait(5000); // Wait for initial load
|
||
|
||
// SECOND: Progressive scrolling to load more
|
||
console.log(`⏳ Progressive scrolling (max ${CONFIG.maxScrolls} scrolls, ${CONFIG.noChangeThreshold} patience)...\n`);
|
||
|
||
for (let i = 0; i < CONFIG.maxScrolls; i++) {
|
||
const progress = Math.round((i / CONFIG.maxScrolls) * 100);
|
||
console.log(`🔄 Scroll ${i + 1}/${CONFIG.maxScrolls} (${progress}%)...`);
|
||
|
||
const listings = await page.evaluate(() => {
|
||
const results = [];
|
||
const selectors = [
|
||
'.list-item', // PRIMARY - Found in HTML analysis
|
||
'.house-item', '.room-item', '.van-card',
|
||
'[class*="list-item"]', '[class*="house"]', '[class*="room"]'
|
||
];
|
||
|
||
let items = [];
|
||
for (const sel of selectors) {
|
||
const elements = document.querySelectorAll(sel);
|
||
if (elements.length > 0 && elements.length < 200) {
|
||
items = Array.from(elements);
|
||
console.log(`Using selector: ${sel} (${elements.length} items)`);
|
||
break; // Use first working selector
|
||
}
|
||
}
|
||
|
||
items.forEach((item, idx) => {
|
||
if (idx >= 50) return;
|
||
|
||
const listing = { index: idx + 1 };
|
||
|
||
// Debug: log all attributes of first item
|
||
if (idx === 0) {
|
||
console.log('DEBUG First item attributes:', {
|
||
className: item.className,
|
||
id: item.id,
|
||
attributes: Array.from(item.attributes || []).map(a => `${a.name}=${a.value}`),
|
||
innerHTML: item.innerHTML.substring(0, 200)
|
||
});
|
||
}
|
||
|
||
// Title - Try specific Xiaozhu classes first
|
||
const titleEl = item.querySelector('.list-title, h2, h3, h4, .title, .name, [class*="title"]');
|
||
if (titleEl) listing.title = titleEl.textContent.trim();
|
||
|
||
// Price - Try specific Xiaozhu classes first
|
||
const priceEl = item.querySelector('.list-price, .price-left, .price, [class*="price"]');
|
||
if (priceEl) {
|
||
const match = priceEl.textContent.match(/(\d+)/);
|
||
if (match) {
|
||
listing.priceDaily = parseInt(match[1]);
|
||
listing.priceText = priceEl.textContent.trim();
|
||
}
|
||
}
|
||
|
||
if (!listing.priceDaily) {
|
||
const match = item.textContent.match(/[¥¥]?\s*(\d+)\s*[元\/晚]/);
|
||
if (match) listing.priceDaily = parseInt(match[1]);
|
||
}
|
||
|
||
// Location - Extract from content or title
|
||
const contentEl = item.querySelector('.list-content, .content, .location, .address');
|
||
if (contentEl) listing.location = contentEl.textContent.trim();
|
||
|
||
// Also check title for location keywords
|
||
if (!listing.location && listing.title) {
|
||
listing.location = listing.title;
|
||
}
|
||
|
||
// URL - Try multiple approaches
|
||
// 1. Direct link
|
||
const linkEl = item.querySelector('a');
|
||
if (linkEl && linkEl.href && linkEl.href !== 'javascript:;') {
|
||
listing.url = linkEl.href;
|
||
}
|
||
|
||
// 2. Data attributes (房源ID / listing ID)
|
||
if (!listing.url) {
|
||
const dataId = item.getAttribute('data-id') ||
|
||
item.getAttribute('data-house-id') ||
|
||
item.getAttribute('data-fid');
|
||
if (dataId) {
|
||
listing.url = `https://minsu.xiaozhu.com/house/${dataId}`;
|
||
listing.houseId = dataId;
|
||
}
|
||
}
|
||
|
||
// 3. Look for ID in onclick or other attributes
|
||
if (!listing.url) {
|
||
const onclick = item.getAttribute('onclick') || item.getAttribute('@click');
|
||
if (onclick) {
|
||
const idMatch = onclick.match(/\d{6,}/);
|
||
if (idMatch) {
|
||
listing.url = `https://minsu.xiaozhu.com/house/${idMatch[0]}`;
|
||
listing.houseId = idMatch[0];
|
||
}
|
||
}
|
||
}
|
||
|
||
// 4. Check child elements for router-link
|
||
if (!listing.url) {
|
||
const routerLink = item.querySelector('[to], [router-link]');
|
||
if (routerLink) {
|
||
const to = routerLink.getAttribute('to') || routerLink.getAttribute('router-link');
|
||
if (to) {
|
||
listing.url = `https://minsu.xiaozhu.com${to}`;
|
||
}
|
||
}
|
||
}
|
||
|
||
// Image
|
||
const imgEl = item.querySelector('img');
|
||
if (imgEl) listing.image = imgEl.src;
|
||
|
||
// Equipment (check Chinese text, not lowercased)
|
||
const fullText = item.textContent;
|
||
listing.hasKitchen = fullText.includes('厨房') || fullText.includes('可做饭') || fullText.includes('可烧饭');
|
||
listing.hasFridge = fullText.includes('冰箱') || fullText.includes('冷藏');
|
||
listing.hasWashingMachine = fullText.includes('洗衣机');
|
||
listing.hasMetro = fullText.includes('地铁') || fullText.includes('站');
|
||
|
||
if (listing.title || listing.priceDaily) {
|
||
results.push(listing);
|
||
}
|
||
});
|
||
|
||
return results;
|
||
});
|
||
|
||
// Better duplicate detection (URL or title+price)
|
||
const newListings = listings.filter(l => {
|
||
const isDuplicate = allListings.some(existing => {
|
||
// By URL if available
|
||
if (l.url && existing.url && l.url === existing.url) return true;
|
||
// By title + price combination
|
||
if (l.title && existing.title && l.priceDaily && existing.priceDaily) {
|
||
return l.title === existing.title && l.priceDaily === existing.priceDaily;
|
||
}
|
||
return false;
|
||
});
|
||
return !isDuplicate;
|
||
});
|
||
|
||
allListings = [...allListings, ...newListings];
|
||
|
||
console.log(` Found ${listings.length} items, ${newListings.length} new, ${allListings.length} total`);
|
||
|
||
if (allListings.length === previousCount) {
|
||
noChangeCount++;
|
||
if (noChangeCount >= CONFIG.noChangeThreshold) {
|
||
console.log(` No new listings for ${CONFIG.noChangeThreshold} scrolls, stopping...`);
|
||
break;
|
||
}
|
||
} else {
|
||
noChangeCount = 0;
|
||
}
|
||
|
||
previousCount = allListings.length;
|
||
|
||
// Scroll down
|
||
await page.evaluate(() => window.scrollBy(0, window.innerHeight));
|
||
|
||
// Wait for loading indicators to disappear
|
||
try {
|
||
await page.waitForFunction(() => {
|
||
// Check for common loading indicators
|
||
const loadingEls = document.querySelectorAll('.loading, .spinner, [class*="loading"]');
|
||
return loadingEls.length === 0 || Array.from(loadingEls).every(el => el.style.display === 'none');
|
||
}, { timeout: 2000 });
|
||
} catch (e) {
|
||
// No loading indicator found, that's fine
|
||
}
|
||
|
||
// Additional wait for lazy load
|
||
await wait(CONFIG.scrollDelay);
|
||
}
|
||
|
||
await screenshot(page, 'final');
|
||
|
||
console.log(`\n✅ Total extracted: ${allListings.length} listings\n`);
|
||
|
||
// Save raw listings for debug
|
||
fs.writeFileSync('./xiaozhu_raw_listings.json', JSON.stringify(allListings, null, 2));
|
||
console.log('💾 Raw listings saved to xiaozhu_raw_listings.json\n');
|
||
|
||
if (allListings.length === 0) {
|
||
console.log('❌ No listings found!');
|
||
const html = await page.content();
|
||
fs.writeFileSync('./xiaozhu_fixed_page.html', html);
|
||
console.log('💾 Saved HTML to xiaozhu_fixed_page.html');
|
||
|
||
const pageInfo = await page.evaluate(() => ({
|
||
url: window.location.href,
|
||
title: document.title,
|
||
hasShanghai: document.body.textContent.includes('上海'),
|
||
hasBeijing: document.body.textContent.includes('北京'),
|
||
bodyPreview: document.body.textContent.substring(0, 500)
|
||
}));
|
||
|
||
console.log('\n📋 Page diagnosis:');
|
||
console.log(` URL: ${pageInfo.url}`);
|
||
console.log(` Title: ${pageInfo.title}`);
|
||
console.log(` Has Shanghai: ${pageInfo.hasShanghai ? '✅' : '❌'}`);
|
||
console.log(` Has Beijing: ${pageInfo.hasBeijing ? '⚠️' : '✅'}`);
|
||
console.log(` Preview: ${pageInfo.bodyPreview.substring(0, 200)}...`);
|
||
|
||
} else {
|
||
const processed = processListings(allListings);
|
||
fs.writeFileSync(CONFIG.outputFile, JSON.stringify(processed, null, 2));
|
||
console.log(`💾 ${CONFIG.outputFile}`);
|
||
|
||
const markdown = generateMarkdown(processed);
|
||
fs.writeFileSync(CONFIG.outputMarkdown, markdown);
|
||
console.log(`📝 ${CONFIG.outputMarkdown}`);
|
||
|
||
printTopResults(processed);
|
||
}
|
||
|
||
} catch (err) {
|
||
console.error('❌ Error:', err.message);
|
||
await screenshot(page, 'error');
|
||
} finally {
|
||
if (CONFIG.headless) {
|
||
await browser.close();
|
||
}
|
||
}
|
||
}
|
||
|
||
function processListings(listings) {
|
||
return listings
|
||
.filter(l => l.priceDaily && l.priceDaily > 0)
|
||
.map(l => {
|
||
l.priceTotal = l.priceDaily * CONFIG.days;
|
||
l.priceMonthly = Math.ceil(l.priceDaily * 30);
|
||
|
||
let score = 0;
|
||
|
||
if (l.priceTotal <= CONFIG.dailyBudgetIdeal) {
|
||
score += (CONFIG.dailyBudgetIdeal - l.priceTotal) / 100;
|
||
} else if (l.priceTotal <= CONFIG.dailyBudgetMax) {
|
||
score -= (l.priceTotal - CONFIG.dailyBudgetIdeal) / 50;
|
||
} else {
|
||
score -= 100;
|
||
}
|
||
|
||
if (l.hasKitchen) score += 20;
|
||
if (l.hasFridge) score += 15;
|
||
if (l.hasWashingMachine) score += 10;
|
||
if (l.hasMetro) score += 15;
|
||
|
||
if (l.location) {
|
||
if (l.location.includes(CONFIG.district)) score += 20;
|
||
if (l.location.includes(CONFIG.keyword)) score += 10;
|
||
}
|
||
if (l.title && l.title.includes(CONFIG.keyword)) score += 10;
|
||
|
||
l.score = Math.round(score * 10) / 10;
|
||
return l;
|
||
})
|
||
// Relax filtering - show results even without kitchen/fridge detected
|
||
// .filter(l => l.hasKitchen && l.hasFridge) // Too strict - equipment might be in icons
|
||
.filter(l => l.priceTotal <= CONFIG.dailyBudgetMax * 1.2) // Allow 20% over budget
|
||
.sort((a, b) => b.score - a.score)
|
||
.slice(0, CONFIG.topN);
|
||
}
|
||
|
||
function generateMarkdown(listings) {
|
||
let md = '# Xiaozhu Results - FIXED Scraper\n\n';
|
||
md += `**Date:** ${new Date().toLocaleDateString()}\n`;
|
||
md += `**Location:** ${CONFIG.city} ${CONFIG.district}\n`;
|
||
md += `**Dates:** ${CONFIG.checkIn} → ${CONFIG.checkOut} (${CONFIG.days} days)\n\n`;
|
||
|
||
md += '| # | Title | Daily | Total | Kitchen | Fridge | Washer | Metro | Score | Link |\n';
|
||
md += '|---|-------|-------|-------|---------|--------|--------|-------|-------|------|\n';
|
||
|
||
listings.forEach((l, i) => {
|
||
md += `| ${i + 1} `;
|
||
md += `| ${(l.title || 'Untitled').substring(0, 40)} `;
|
||
md += `| ¥${l.priceDaily} `;
|
||
md += `| ¥${l.priceTotal} `;
|
||
md += `| ${l.hasKitchen ? '✓' : '✗'} `;
|
||
md += `| ${l.hasFridge ? '✓' : '✗'} `;
|
||
md += `| ${l.hasWashingMachine ? '✓' : '✗'} `;
|
||
md += `| ${l.hasMetro ? '✓' : '✗'} `;
|
||
md += `| ${l.score} `;
|
||
md += `| ${l.url ? `[View](${l.url})` : '-'} |\n`;
|
||
});
|
||
|
||
return md;
|
||
}
|
||
|
||
function printTopResults(listings) {
|
||
console.log('\n🏆 TOP RESULTS:\n');
|
||
listings.slice(0, 5).forEach((l, i) => {
|
||
console.log(`${i + 1}. ${l.title || 'Untitled'}`);
|
||
console.log(` 💰 ¥${l.priceDaily}/day × ${CONFIG.days} days = ¥${l.priceTotal}`);
|
||
if (l.location) console.log(` 📍 ${l.location}`);
|
||
console.log(` ✓ Kitchen: ${l.hasKitchen ? '✓' : '✗'} | Fridge: ${l.hasFridge ? '✓' : '✗'} | Washer: ${l.hasWashingMachine ? '✓' : '✗'} | Metro: ${l.hasMetro ? '✓' : '✗'}`);
|
||
console.log(` ⭐ ${l.score}`);
|
||
if (l.url) console.log(` 🔗 ${l.url}`);
|
||
console.log('');
|
||
});
|
||
}
|
||
|
||
scrapXiaozhu().catch(console.error);
|