personal-hub/tools/xiaozhu_url_extractor.js
StillHammer 3c8162c990 Sync couple_matters: December crisis, separation agreement, daily check v2, xiaozhu search
Major updates:
- December 2025 crisis documentation and separation agreement
- Daily check system v2 with multiple card categories
- Xiaozhu rental search tools and results
- Exit plan documentation
- Message drafts for family communication
- Confluent moved to CONSTANT
- Updated profiles and promises

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-23 07:04:02 +08:00

224 lines
7.2 KiB
JavaScript

const puppeteer = require('puppeteer');
const fs = require('fs');
/**
* Xiaozhu URL Extractor - Click listings to get real URLs
* Workaround for Vue.js router-link navigation
*/
const CONFIG = {
searchQuery: '交通大学',
latitude: 31.1880,
longitude: 121.4367,
maxListings: 10
};
console.log('🔗 Xiaozhu URL Extractor');
console.log('🎯 Extracting real URLs by simulating clicks...\n');
async function extractURLs() {
const browser = await puppeteer.launch({
headless: "new",
defaultViewport: { width: 414, height: 896 },
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const page = await browser.newPage();
await page.setUserAgent('Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15');
const context = browser.defaultBrowserContext();
await context.overridePermissions('https://minsu.xiaozhu.com', ['geolocation']);
await page.setGeolocation({ latitude: CONFIG.latitude, longitude: CONFIG.longitude, accuracy: 100 });
try {
console.log('🌐 Loading Xiaozhu...');
await page.goto('https://minsu.xiaozhu.com/', { waitUntil: 'networkidle2', timeout: 30000 });
await new Promise(resolve => setTimeout(resolve, 3000));
console.log('📸 Taking homepage screenshot...');
await page.screenshot({ path: './xiaozhu_url_extract_home.png' });
// Search
const searchInput = await page.$('input[type="text"]');
if (searchInput) {
console.log('⌨️ Found search input, typing...');
await searchInput.click();
await new Promise(resolve => setTimeout(resolve, 500));
// Clear any pre-filled text
await page.keyboard.down('Control');
await page.keyboard.press('A');
await page.keyboard.up('Control');
await page.keyboard.press('Backspace');
await searchInput.type(CONFIG.searchQuery, { delay: 150 });
console.log(` Typed: ${CONFIG.searchQuery}`);
await new Promise(resolve => setTimeout(resolve, 2000));
await page.screenshot({ path: './xiaozhu_url_extract_typed.png' });
// Look for suggestion with keyword
const clicked = await page.evaluate((keyword) => {
const elements = document.querySelectorAll('div, li, a, span');
const matches = [];
for (const el of elements) {
const text = el.textContent.trim();
const rect = el.getBoundingClientRect();
if (rect.width > 0 && rect.height > 0 && text.includes(keyword)) {
matches.push({ el, text, score: text === keyword ? 100 : 50 });
}
}
matches.sort((a, b) => b.score - a.score);
if (matches.length > 0) {
console.log(`Clicking suggestion: "${matches[0].text}"`);
matches[0].el.click();
return true;
}
return false;
}, CONFIG.searchQuery);
if (clicked) {
console.log('✅ Clicked suggestion\n');
await new Promise(resolve => setTimeout(resolve, 4000));
} else {
console.log('⚠️ No suggestion, pressing Enter\n');
await page.keyboard.press('Enter');
await new Promise(resolve => setTimeout(resolve, 3000));
}
}
await page.screenshot({ path: './xiaozhu_url_extract_search.png' });
// Scroll to load all
console.log('⏬ Scrolling to load listings...');
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await new Promise(resolve => setTimeout(resolve, 5000));
await page.screenshot({ path: './xiaozhu_url_extract_scrolled.png' });
// Get all listing elements
let listingElements = await page.$$('.list-item');
console.log(`📊 Found ${listingElements.length} listings\n`);
if (listingElements.length === 0) {
console.log('⚠️ No .list-item found, trying alternative selectors...');
const altSelectors = [
'[class*="list-item"]',
'[class*="house"]',
'[class*="room"]'
];
for (const sel of altSelectors) {
listingElements = await page.$$(sel);
if (listingElements.length > 0) {
console.log(` ✅ Found ${listingElements.length} with selector: ${sel}\n`);
break;
}
}
}
if (listingElements.length === 0) {
console.log('❌ No listings found. Check screenshots for debugging.');
await browser.close();
return;
}
const results = [];
for (let i = 0; i < Math.min(listingElements.length, CONFIG.maxListings); i++) {
console.log(`🔍 ${i + 1}/${Math.min(listingElements.length, CONFIG.maxListings)}...`);
try {
// Re-query elements each time (they become stale after navigation)
const currentElements = await page.$$('.list-item, [class*="list-item"]');
if (i >= currentElements.length) {
console.log(' ⚠️ Element no longer available');
continue;
}
// Get listing info before click
const listingInfo = await page.evaluate((index) => {
const items = document.querySelectorAll('.list-item, [class*="list-item"]');
const item = items[index];
if (!item) return null;
const titleEl = item.querySelector('.list-title, [class*="title"]');
const priceEl = item.querySelector('.list-price, [class*="price"]');
const imgEl = item.querySelector('img');
return {
title: titleEl?.textContent.trim() || 'No title',
price: priceEl?.textContent.trim() || 'No price',
image: imgEl?.src || null
};
}, i);
if (!listingInfo) {
console.log(' ⚠️ Could not extract info');
continue;
}
console.log(` 📝 ${listingInfo.title.substring(0, 50)}...`);
console.log(` 💰 ${listingInfo.price}`);
// Click the listing
await currentElements[i].click();
console.log(` 👆 Clicked`);
// Wait for navigation
await new Promise(resolve => setTimeout(resolve, 4000));
// Get current URL
const url = page.url();
console.log(` 🔗 ${url}`);
results.push({
...listingInfo,
url: url
});
// Go back
await page.goBack({ waitUntil: 'networkidle2', timeout: 10000 });
console.log(` ⬅️ Back to list`);
await new Promise(resolve => setTimeout(resolve, 2000));
console.log('');
} catch (err) {
console.log(` ❌ Error: ${err.message}`);
// Try to recover by going back
try {
await page.goBack();
await new Promise(resolve => setTimeout(resolve, 2000));
} catch (e) {
console.log(' ⚠️ Could not go back, continuing...');
}
}
}
// Save results
const outputFile = './xiaozhu_urls.json';
fs.writeFileSync(outputFile, JSON.stringify(results, null, 2));
console.log(`\n💾 Saved ${results.length} URLs to ${outputFile}`);
// Print results
console.log('\n📋 EXTRACTED URLS:\n');
results.forEach((r, i) => {
console.log(`${i + 1}. ${r.title?.substring(0, 60)}`);
console.log(` 💰 ${r.price}`);
console.log(` 🔗 ${r.url}\n`);
});
} catch (err) {
console.error('❌ Error:', err.message);
} finally {
await browser.close();
}
}
extractURLs().catch(console.error);