feedgenerator/tests/test_aggregator.py

"""Tests for aggregator.py module."""

from __future__ import annotations

from datetime import datetime

import pytest

from src.aggregator import AggregatedContent, ContentAggregator
from src.image_analyzer import ImageAnalysis
from src.scraper import NewsArticle


def test_aggregated_content_creation() -> None:
    """Test AggregatedContent creation."""
    article = NewsArticle(
        title="Test",
        url="https://example.com",
        content="Content",
        image_url="https://example.com/img.jpg",
        published_at=None,
        source="https://example.com",
    )

    analysis = ImageAnalysis(
        image_url="https://example.com/img.jpg",
        description="Test description",
        confidence=0.9,
        analysis_time=datetime.now(),
    )

    content = AggregatedContent(news=article, image_analysis=analysis)

    assert content.news == article
    assert content.image_analysis == analysis


def test_aggregated_content_to_prompt() -> None:
    """Test conversion to generation prompt."""
    article = NewsArticle(
        title="Test Title",
        url="https://example.com",
        content="Test Content",
        image_url="https://example.com/img.jpg",
        published_at=None,
        source="https://example.com",
    )

    analysis = ImageAnalysis(
        image_url="https://example.com/img.jpg",
        description="Image description",
        confidence=0.9,
        analysis_time=datetime.now(),
    )

    content = AggregatedContent(news=article, image_analysis=analysis)
    prompt = content.to_generation_prompt()

    assert prompt["topic"] == "Test Title"
    assert prompt["context"] == "Test Content"
    assert prompt["image_description"] == "Image description"


def test_aggregated_content_to_prompt_no_image() -> None:
    """Test conversion to prompt without image."""
    article = NewsArticle(
        title="Test Title",
        url="https://example.com",
        content="Test Content",
        image_url=None,
        published_at=None,
        source="https://example.com",
    )

    content = AggregatedContent(news=article, image_analysis=None)
    prompt = content.to_generation_prompt()

    assert prompt["topic"] == "Test Title"
    assert prompt["context"] == "Test Content"
    assert "image_description" not in prompt


def test_aggregator_initialization() -> None:
    """Test ContentAggregator initialization."""
    aggregator = ContentAggregator(min_confidence=0.5)
    assert aggregator._min_confidence == 0.5


def test_aggregator_invalid_confidence() -> None:
    """Test ContentAggregator rejects invalid confidence."""
    with pytest.raises(ValueError, match="min_confidence must be between"):
        ContentAggregator(min_confidence=1.5)


def test_aggregator_aggregate_with_matching_analysis() -> None:
    """Test aggregation with matching image analysis."""
    aggregator = ContentAggregator(min_confidence=0.5)

    article = NewsArticle(
        title="Test",
        url="https://example.com",
        content="Content",
        image_url="https://example.com/img.jpg",
        published_at=None,
        source="https://example.com",
    )

    analysis = ImageAnalysis(
        image_url="https://example.com/img.jpg",
        description="Description",
        confidence=0.9,
        analysis_time=datetime.now(),
    )

    aggregated = aggregator.aggregate([article], {"https://example.com/img.jpg": analysis})

    assert len(aggregated) == 1
    assert aggregated[0].news == article
    assert aggregated[0].image_analysis == analysis


def test_aggregator_aggregate_low_confidence() -> None:
    """Test aggregation filters low-confidence analyses."""
    aggregator = ContentAggregator(min_confidence=0.8)

    article = NewsArticle(
        title="Test",
        url="https://example.com",
        content="Content",
        image_url="https://example.com/img.jpg",
        published_at=None,
        source="https://example.com",
    )

    analysis = ImageAnalysis(
        image_url="https://example.com/img.jpg",
        description="Description",
        confidence=0.5,  # Below threshold
        analysis_time=datetime.now(),
    )

    aggregated = aggregator.aggregate([article], {"https://example.com/img.jpg": analysis})

    assert len(aggregated) == 1
    assert aggregated[0].image_analysis is None  # Filtered out


def test_aggregator_aggregate_no_image() -> None:
    """Test aggregation with articles without images."""
    aggregator = ContentAggregator()

    article = NewsArticle(
        title="Test",
        url="https://example.com",
        content="Content",
        image_url=None,
        published_at=None,
        source="https://example.com",
    )

    aggregated = aggregator.aggregate([article], {})

    assert len(aggregated) == 1
    assert aggregated[0].image_analysis is None


def test_aggregator_aggregate_empty_articles() -> None:
    """Test aggregation fails with empty articles list."""
    aggregator = ContentAggregator()

    with pytest.raises(ValueError, match="At least one article is required"):
        aggregator.aggregate([], {})


def test_aggregator_filter_by_image_required() -> None:
    """Test filtering to keep only items with images."""
    aggregator = ContentAggregator()

    article1 = NewsArticle(
        title="Test1",
        url="https://example.com/1",
        content="Content1",
        image_url="https://example.com/img1.jpg",
        published_at=None,
        source="https://example.com",
    )

    article2 = NewsArticle(
        title="Test2",
        url="https://example.com/2",
        content="Content2",
        image_url=None,
        published_at=None,
        source="https://example.com",
    )

    analysis = ImageAnalysis(
        image_url="https://example.com/img1.jpg",
        description="Description",
        confidence=0.9,
        analysis_time=datetime.now(),
    )

    content1 = AggregatedContent(news=article1, image_analysis=analysis)
    content2 = AggregatedContent(news=article2, image_analysis=None)

    filtered = aggregator.filter_by_image_required([content1, content2])

    assert len(filtered) == 1
    assert filtered[0].image_analysis is not None


def test_aggregator_limit_content_length() -> None:
    """Test content length limiting."""
    aggregator = ContentAggregator()

    long_content = "A" * 1000
    article = NewsArticle(
        title="Test",
        url="https://example.com",
        content=long_content,
        image_url=None,
        published_at=None,
        source="https://example.com",
    )

    content = AggregatedContent(news=article, image_analysis=None)

    truncated = aggregator.limit_content_length([content], max_length=100)

    assert len(truncated) == 1
    assert len(truncated[0].news.content) == 103  # 100 + "..."
    assert truncated[0].news.content.endswith("...")