Major updates: - December 2025 crisis documentation and separation agreement - Daily check system v2 with multiple card categories - Xiaozhu rental search tools and results - Exit plan documentation - Message drafts for family communication - Confluent moved to CONSTANT - Updated profiles and promises 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
224 lines
7.2 KiB
JavaScript
224 lines
7.2 KiB
JavaScript
const puppeteer = require('puppeteer');
|
|
const fs = require('fs');
|
|
|
|
/**
|
|
* Xiaozhu URL Extractor - Click listings to get real URLs
|
|
* Workaround for Vue.js router-link navigation
|
|
*/
|
|
|
|
const CONFIG = {
|
|
searchQuery: '交通大学',
|
|
latitude: 31.1880,
|
|
longitude: 121.4367,
|
|
maxListings: 10
|
|
};
|
|
|
|
console.log('🔗 Xiaozhu URL Extractor');
|
|
console.log('🎯 Extracting real URLs by simulating clicks...\n');
|
|
|
|
async function extractURLs() {
|
|
const browser = await puppeteer.launch({
|
|
headless: "new",
|
|
defaultViewport: { width: 414, height: 896 },
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
|
});
|
|
|
|
const page = await browser.newPage();
|
|
await page.setUserAgent('Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15');
|
|
|
|
const context = browser.defaultBrowserContext();
|
|
await context.overridePermissions('https://minsu.xiaozhu.com', ['geolocation']);
|
|
await page.setGeolocation({ latitude: CONFIG.latitude, longitude: CONFIG.longitude, accuracy: 100 });
|
|
|
|
try {
|
|
console.log('🌐 Loading Xiaozhu...');
|
|
await page.goto('https://minsu.xiaozhu.com/', { waitUntil: 'networkidle2', timeout: 30000 });
|
|
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
|
|
console.log('📸 Taking homepage screenshot...');
|
|
await page.screenshot({ path: './xiaozhu_url_extract_home.png' });
|
|
|
|
// Search
|
|
const searchInput = await page.$('input[type="text"]');
|
|
if (searchInput) {
|
|
console.log('⌨️ Found search input, typing...');
|
|
await searchInput.click();
|
|
await new Promise(resolve => setTimeout(resolve, 500));
|
|
|
|
// Clear any pre-filled text
|
|
await page.keyboard.down('Control');
|
|
await page.keyboard.press('A');
|
|
await page.keyboard.up('Control');
|
|
await page.keyboard.press('Backspace');
|
|
|
|
await searchInput.type(CONFIG.searchQuery, { delay: 150 });
|
|
console.log(` Typed: ${CONFIG.searchQuery}`);
|
|
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
|
|
await page.screenshot({ path: './xiaozhu_url_extract_typed.png' });
|
|
|
|
// Look for suggestion with keyword
|
|
const clicked = await page.evaluate((keyword) => {
|
|
const elements = document.querySelectorAll('div, li, a, span');
|
|
const matches = [];
|
|
|
|
for (const el of elements) {
|
|
const text = el.textContent.trim();
|
|
const rect = el.getBoundingClientRect();
|
|
|
|
if (rect.width > 0 && rect.height > 0 && text.includes(keyword)) {
|
|
matches.push({ el, text, score: text === keyword ? 100 : 50 });
|
|
}
|
|
}
|
|
|
|
matches.sort((a, b) => b.score - a.score);
|
|
|
|
if (matches.length > 0) {
|
|
console.log(`Clicking suggestion: "${matches[0].text}"`);
|
|
matches[0].el.click();
|
|
return true;
|
|
}
|
|
return false;
|
|
}, CONFIG.searchQuery);
|
|
|
|
if (clicked) {
|
|
console.log('✅ Clicked suggestion\n');
|
|
await new Promise(resolve => setTimeout(resolve, 4000));
|
|
} else {
|
|
console.log('⚠️ No suggestion, pressing Enter\n');
|
|
await page.keyboard.press('Enter');
|
|
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
}
|
|
}
|
|
|
|
await page.screenshot({ path: './xiaozhu_url_extract_search.png' });
|
|
|
|
// Scroll to load all
|
|
console.log('⏬ Scrolling to load listings...');
|
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
|
await new Promise(resolve => setTimeout(resolve, 5000));
|
|
|
|
await page.screenshot({ path: './xiaozhu_url_extract_scrolled.png' });
|
|
|
|
// Get all listing elements
|
|
let listingElements = await page.$$('.list-item');
|
|
console.log(`📊 Found ${listingElements.length} listings\n`);
|
|
|
|
if (listingElements.length === 0) {
|
|
console.log('⚠️ No .list-item found, trying alternative selectors...');
|
|
const altSelectors = [
|
|
'[class*="list-item"]',
|
|
'[class*="house"]',
|
|
'[class*="room"]'
|
|
];
|
|
|
|
for (const sel of altSelectors) {
|
|
listingElements = await page.$$(sel);
|
|
if (listingElements.length > 0) {
|
|
console.log(` ✅ Found ${listingElements.length} with selector: ${sel}\n`);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (listingElements.length === 0) {
|
|
console.log('❌ No listings found. Check screenshots for debugging.');
|
|
await browser.close();
|
|
return;
|
|
}
|
|
|
|
const results = [];
|
|
|
|
for (let i = 0; i < Math.min(listingElements.length, CONFIG.maxListings); i++) {
|
|
console.log(`🔍 ${i + 1}/${Math.min(listingElements.length, CONFIG.maxListings)}...`);
|
|
|
|
try {
|
|
// Re-query elements each time (they become stale after navigation)
|
|
const currentElements = await page.$$('.list-item, [class*="list-item"]');
|
|
if (i >= currentElements.length) {
|
|
console.log(' ⚠️ Element no longer available');
|
|
continue;
|
|
}
|
|
|
|
// Get listing info before click
|
|
const listingInfo = await page.evaluate((index) => {
|
|
const items = document.querySelectorAll('.list-item, [class*="list-item"]');
|
|
const item = items[index];
|
|
if (!item) return null;
|
|
|
|
const titleEl = item.querySelector('.list-title, [class*="title"]');
|
|
const priceEl = item.querySelector('.list-price, [class*="price"]');
|
|
const imgEl = item.querySelector('img');
|
|
|
|
return {
|
|
title: titleEl?.textContent.trim() || 'No title',
|
|
price: priceEl?.textContent.trim() || 'No price',
|
|
image: imgEl?.src || null
|
|
};
|
|
}, i);
|
|
|
|
if (!listingInfo) {
|
|
console.log(' ⚠️ Could not extract info');
|
|
continue;
|
|
}
|
|
|
|
console.log(` 📝 ${listingInfo.title.substring(0, 50)}...`);
|
|
console.log(` 💰 ${listingInfo.price}`);
|
|
|
|
// Click the listing
|
|
await currentElements[i].click();
|
|
console.log(` 👆 Clicked`);
|
|
|
|
// Wait for navigation
|
|
await new Promise(resolve => setTimeout(resolve, 4000));
|
|
|
|
// Get current URL
|
|
const url = page.url();
|
|
console.log(` 🔗 ${url}`);
|
|
|
|
results.push({
|
|
...listingInfo,
|
|
url: url
|
|
});
|
|
|
|
// Go back
|
|
await page.goBack({ waitUntil: 'networkidle2', timeout: 10000 });
|
|
console.log(` ⬅️ Back to list`);
|
|
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
|
|
console.log('');
|
|
|
|
} catch (err) {
|
|
console.log(` ❌ Error: ${err.message}`);
|
|
// Try to recover by going back
|
|
try {
|
|
await page.goBack();
|
|
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
} catch (e) {
|
|
console.log(' ⚠️ Could not go back, continuing...');
|
|
}
|
|
}
|
|
}
|
|
|
|
// Save results
|
|
const outputFile = './xiaozhu_urls.json';
|
|
fs.writeFileSync(outputFile, JSON.stringify(results, null, 2));
|
|
console.log(`\n💾 Saved ${results.length} URLs to ${outputFile}`);
|
|
|
|
// Print results
|
|
console.log('\n📋 EXTRACTED URLS:\n');
|
|
results.forEach((r, i) => {
|
|
console.log(`${i + 1}. ${r.title?.substring(0, 60)}`);
|
|
console.log(` 💰 ${r.price}`);
|
|
console.log(` 🔗 ${r.url}\n`);
|
|
});
|
|
|
|
} catch (err) {
|
|
console.error('❌ Error:', err.message);
|
|
} finally {
|
|
await browser.close();
|
|
}
|
|
}
|
|
|
|
extractURLs().catch(console.error);
|