""" Main pipeline orchestrator for Feed Generator. Run with: python scripts/run.py """ from __future__ import annotations import logging import sys from pathlib import Path # Add project root to path sys.path.insert(0, str(Path(__file__).parent.parent)) from src.aggregator import ContentAggregator from src.article_client import ArticleAPIClient from src.config import Config from src.exceptions import ( APIClientError, ConfigurationError, ImageAnalysisError, PublishingError, ScrapingError, ) from src.image_analyzer import ImageAnalyzer from src.publisher import FeedPublisher from src.scraper import NewsScraper logger = logging.getLogger(__name__) def setup_logging(log_level: str) -> None: """Setup logging configuration. Args: log_level: Logging level (DEBUG, INFO, WARNING, ERROR) """ logging.basicConfig( level=getattr(logging, log_level.upper()), format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", handlers=[ logging.StreamHandler(sys.stdout), logging.FileHandler("feed_generator.log"), ], ) def run_pipeline(config: Config) -> None: """Execute complete feed generation pipeline. Args: config: Configuration object Raises: Various exceptions if pipeline fails """ logger.info("=" * 60) logger.info("Starting Feed Generator Pipeline") logger.info("=" * 60) # 1. Initialize components logger.info("Initializing components...") scraper = NewsScraper(config.scraper) analyzer = ImageAnalyzer(config.api.openai_key) aggregator = ContentAggregator() client = ArticleAPIClient(config.api.node_api_url, config.api.timeout_seconds) publisher = FeedPublisher(config.publisher.output_dir) logger.info("Components initialized successfully") # 2. Scrape news sources logger.info("=" * 60) logger.info("Stage 1: Scraping news sources") logger.info("=" * 60) try: articles = scraper.scrape_all() logger.info(f"✓ Scraped {len(articles)} articles") if not articles: logger.error("No articles scraped, exiting") return except ScrapingError as e: logger.error(f"✗ Scraping failed: {e}") return # 3. Analyze images logger.info("=" * 60) logger.info("Stage 2: Analyzing images") logger.info("=" * 60) try: analyses = analyzer.analyze_batch(articles) logger.info(f"✓ Analyzed {len(analyses)} images") except ImageAnalysisError as e: logger.warning(f"⚠ Image analysis failed: {e}, proceeding without images") analyses = {} # 4. Aggregate content logger.info("=" * 60) logger.info("Stage 3: Aggregating content") logger.info("=" * 60) aggregated = aggregator.aggregate(articles, analyses) logger.info(f"✓ Aggregated {len(aggregated)} items") # 5. Generate articles logger.info("=" * 60) logger.info("Stage 4: Generating articles") logger.info("=" * 60) try: prompts = [item.to_generation_prompt() for item in aggregated] original_news_list = [item.news for item in aggregated] generated = client.generate_batch(prompts, original_news_list) logger.info(f"✓ Generated {len(generated)} articles") if not generated: logger.error("No articles generated, exiting") return except APIClientError as e: logger.error(f"✗ Article generation failed: {e}") return # 6. Publish logger.info("=" * 60) logger.info("Stage 5: Publishing") logger.info("=" * 60) try: rss_path, json_path = publisher.publish_all(generated) logger.info(f"✓ Published RSS to: {rss_path}") logger.info(f"✓ Published JSON to: {json_path}") except PublishingError as e: logger.error(f"✗ Publishing failed: {e}") # Try to save to backup location try: backup_dir = Path("backup") backup_publisher = FeedPublisher(backup_dir) backup_json = backup_publisher.publish_json(generated) logger.warning(f"⚠ Saved backup to: {backup_json}") except Exception as backup_error: logger.error(f"✗ Backup also failed: {backup_error}") return # Success! logger.info("=" * 60) logger.info("Pipeline completed successfully!") logger.info(f"Total articles processed: {len(generated)}") logger.info("=" * 60) def main() -> None: """Main entry point.""" try: # Load configuration config = Config.from_env() # Setup logging setup_logging(config.log_level) # Run pipeline run_pipeline(config) except ConfigurationError as e: print(f"Configuration error: {e}", file=sys.stderr) sys.exit(1) except KeyboardInterrupt: logger.info("Pipeline interrupted by user") sys.exit(130) except Exception as e: logger.exception(f"Unexpected error: {e}") sys.exit(1) if __name__ == "__main__": main()