"""Tests for aggregator.py module.""" from __future__ import annotations from datetime import datetime import pytest from src.aggregator import AggregatedContent, ContentAggregator from src.image_analyzer import ImageAnalysis from src.scraper import NewsArticle def test_aggregated_content_creation() -> None: """Test AggregatedContent creation.""" article = NewsArticle( title="Test", url="https://example.com", content="Content", image_url="https://example.com/img.jpg", published_at=None, source="https://example.com", ) analysis = ImageAnalysis( image_url="https://example.com/img.jpg", description="Test description", confidence=0.9, analysis_time=datetime.now(), ) content = AggregatedContent(news=article, image_analysis=analysis) assert content.news == article assert content.image_analysis == analysis def test_aggregated_content_to_prompt() -> None: """Test conversion to generation prompt.""" article = NewsArticle( title="Test Title", url="https://example.com", content="Test Content", image_url="https://example.com/img.jpg", published_at=None, source="https://example.com", ) analysis = ImageAnalysis( image_url="https://example.com/img.jpg", description="Image description", confidence=0.9, analysis_time=datetime.now(), ) content = AggregatedContent(news=article, image_analysis=analysis) prompt = content.to_generation_prompt() assert prompt["topic"] == "Test Title" assert prompt["context"] == "Test Content" assert prompt["image_description"] == "Image description" def test_aggregated_content_to_prompt_no_image() -> None: """Test conversion to prompt without image.""" article = NewsArticle( title="Test Title", url="https://example.com", content="Test Content", image_url=None, published_at=None, source="https://example.com", ) content = AggregatedContent(news=article, image_analysis=None) prompt = content.to_generation_prompt() assert prompt["topic"] == "Test Title" assert prompt["context"] == "Test Content" assert "image_description" not in prompt def test_aggregator_initialization() -> None: """Test ContentAggregator initialization.""" aggregator = ContentAggregator(min_confidence=0.5) assert aggregator._min_confidence == 0.5 def test_aggregator_invalid_confidence() -> None: """Test ContentAggregator rejects invalid confidence.""" with pytest.raises(ValueError, match="min_confidence must be between"): ContentAggregator(min_confidence=1.5) def test_aggregator_aggregate_with_matching_analysis() -> None: """Test aggregation with matching image analysis.""" aggregator = ContentAggregator(min_confidence=0.5) article = NewsArticle( title="Test", url="https://example.com", content="Content", image_url="https://example.com/img.jpg", published_at=None, source="https://example.com", ) analysis = ImageAnalysis( image_url="https://example.com/img.jpg", description="Description", confidence=0.9, analysis_time=datetime.now(), ) aggregated = aggregator.aggregate([article], {"https://example.com/img.jpg": analysis}) assert len(aggregated) == 1 assert aggregated[0].news == article assert aggregated[0].image_analysis == analysis def test_aggregator_aggregate_low_confidence() -> None: """Test aggregation filters low-confidence analyses.""" aggregator = ContentAggregator(min_confidence=0.8) article = NewsArticle( title="Test", url="https://example.com", content="Content", image_url="https://example.com/img.jpg", published_at=None, source="https://example.com", ) analysis = ImageAnalysis( image_url="https://example.com/img.jpg", description="Description", confidence=0.5, # Below threshold analysis_time=datetime.now(), ) aggregated = aggregator.aggregate([article], {"https://example.com/img.jpg": analysis}) assert len(aggregated) == 1 assert aggregated[0].image_analysis is None # Filtered out def test_aggregator_aggregate_no_image() -> None: """Test aggregation with articles without images.""" aggregator = ContentAggregator() article = NewsArticle( title="Test", url="https://example.com", content="Content", image_url=None, published_at=None, source="https://example.com", ) aggregated = aggregator.aggregate([article], {}) assert len(aggregated) == 1 assert aggregated[0].image_analysis is None def test_aggregator_aggregate_empty_articles() -> None: """Test aggregation fails with empty articles list.""" aggregator = ContentAggregator() with pytest.raises(ValueError, match="At least one article is required"): aggregator.aggregate([], {}) def test_aggregator_filter_by_image_required() -> None: """Test filtering to keep only items with images.""" aggregator = ContentAggregator() article1 = NewsArticle( title="Test1", url="https://example.com/1", content="Content1", image_url="https://example.com/img1.jpg", published_at=None, source="https://example.com", ) article2 = NewsArticle( title="Test2", url="https://example.com/2", content="Content2", image_url=None, published_at=None, source="https://example.com", ) analysis = ImageAnalysis( image_url="https://example.com/img1.jpg", description="Description", confidence=0.9, analysis_time=datetime.now(), ) content1 = AggregatedContent(news=article1, image_analysis=analysis) content2 = AggregatedContent(news=article2, image_analysis=None) filtered = aggregator.filter_by_image_required([content1, content2]) assert len(filtered) == 1 assert filtered[0].image_analysis is not None def test_aggregator_limit_content_length() -> None: """Test content length limiting.""" aggregator = ContentAggregator() long_content = "A" * 1000 article = NewsArticle( title="Test", url="https://example.com", content=long_content, image_url=None, published_at=None, source="https://example.com", ) content = AggregatedContent(news=article, image_analysis=None) truncated = aggregator.limit_content_length([content], max_length=100) assert len(truncated) == 1 assert len(truncated[0].news.content) == 103 # 100 + "..." assert truncated[0].news.content.endswith("...")