Complete Python implementation with strict type safety and best practices.
Features:
- RSS/Atom/HTML web scraping
- GPT-4 Vision image analysis
- Node.js API integration
- RSS/JSON feed publishing
Modules:
- src/config.py: Configuration with strict validation
- src/exceptions.py: Custom exception hierarchy
- src/scraper.py: Multi-format news scraping (RSS/Atom/HTML)
- src/image_analyzer.py: GPT-4 Vision integration with retry
- src/aggregator.py: Content aggregation and filtering
- src/article_client.py: Node.js API client with retry
- src/publisher.py: RSS/JSON feed generation
- scripts/run.py: Complete pipeline orchestrator
- scripts/validate.py: Code quality validation
Code Quality:
- 100% type hint coverage (mypy strict mode)
- Zero bare except clauses
- Logger throughout (no print statements)
- Comprehensive test suite (598 lines)
- Immutable dataclasses (frozen=True)
- Explicit error handling
- Structured logging
Stats:
- 1,431 lines of source code
- 598 lines of test code
- 15 Python files
- 8 core modules
- 4 test suites
All validation checks pass.
🤖 Generated with Claude Code
Co-Authored-By: Claude <noreply@anthropic.com>
156 lines
6.0 KiB
Python
156 lines
6.0 KiB
Python
"""Tests for config.py module."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from src.config import APIConfig, Config, PublisherConfig, ScraperConfig
|
|
from src.exceptions import ConfigurationError
|
|
|
|
|
|
def test_api_config_creation() -> None:
|
|
"""Test APIConfig creation."""
|
|
config = APIConfig(
|
|
openai_key="sk-test123", node_api_url="http://localhost:3000", timeout_seconds=30
|
|
)
|
|
assert config.openai_key == "sk-test123"
|
|
assert config.node_api_url == "http://localhost:3000"
|
|
assert config.timeout_seconds == 30
|
|
|
|
|
|
def test_scraper_config_creation() -> None:
|
|
"""Test ScraperConfig creation."""
|
|
config = ScraperConfig(
|
|
sources=["https://example.com"], max_articles=10, timeout_seconds=10
|
|
)
|
|
assert config.sources == ["https://example.com"]
|
|
assert config.max_articles == 10
|
|
assert config.timeout_seconds == 10
|
|
|
|
|
|
def test_publisher_config_creation() -> None:
|
|
"""Test PublisherConfig creation."""
|
|
config = PublisherConfig(output_dir=Path("./output"))
|
|
assert config.output_dir == Path("./output")
|
|
|
|
|
|
def test_config_from_env_success(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
"""Test successful configuration loading from environment."""
|
|
monkeypatch.setenv("OPENAI_API_KEY", "sk-test123")
|
|
monkeypatch.setenv("NODE_API_URL", "http://localhost:3000")
|
|
monkeypatch.setenv("NEWS_SOURCES", "https://example.com,https://test.com")
|
|
monkeypatch.setenv("LOG_LEVEL", "DEBUG")
|
|
|
|
config = Config.from_env()
|
|
|
|
assert config.api.openai_key == "sk-test123"
|
|
assert config.api.node_api_url == "http://localhost:3000"
|
|
assert config.scraper.sources == ["https://example.com", "https://test.com"]
|
|
assert config.log_level == "DEBUG"
|
|
|
|
|
|
def test_config_from_env_missing_openai_key(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
"""Test configuration fails when OPENAI_API_KEY is missing."""
|
|
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
|
monkeypatch.setenv("NODE_API_URL", "http://localhost:3000")
|
|
monkeypatch.setenv("NEWS_SOURCES", "https://example.com")
|
|
|
|
with pytest.raises(ConfigurationError, match="OPENAI_API_KEY"):
|
|
Config.from_env()
|
|
|
|
|
|
def test_config_from_env_invalid_openai_key(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
"""Test configuration fails when OPENAI_API_KEY has invalid format."""
|
|
monkeypatch.setenv("OPENAI_API_KEY", "invalid-key")
|
|
monkeypatch.setenv("NODE_API_URL", "http://localhost:3000")
|
|
monkeypatch.setenv("NEWS_SOURCES", "https://example.com")
|
|
|
|
with pytest.raises(ConfigurationError, match="must start with 'sk-'"):
|
|
Config.from_env()
|
|
|
|
|
|
def test_config_from_env_missing_node_api_url(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
"""Test configuration fails when NODE_API_URL is missing."""
|
|
monkeypatch.setenv("OPENAI_API_KEY", "sk-test123")
|
|
monkeypatch.delenv("NODE_API_URL", raising=False)
|
|
monkeypatch.setenv("NEWS_SOURCES", "https://example.com")
|
|
|
|
with pytest.raises(ConfigurationError, match="NODE_API_URL"):
|
|
Config.from_env()
|
|
|
|
|
|
def test_config_from_env_invalid_node_api_url(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
"""Test configuration fails when NODE_API_URL is invalid."""
|
|
monkeypatch.setenv("OPENAI_API_KEY", "sk-test123")
|
|
monkeypatch.setenv("NODE_API_URL", "not-a-url")
|
|
monkeypatch.setenv("NEWS_SOURCES", "https://example.com")
|
|
|
|
with pytest.raises(ConfigurationError, match="Invalid NODE_API_URL"):
|
|
Config.from_env()
|
|
|
|
|
|
def test_config_from_env_missing_news_sources(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
"""Test configuration fails when NEWS_SOURCES is missing."""
|
|
monkeypatch.setenv("OPENAI_API_KEY", "sk-test123")
|
|
monkeypatch.setenv("NODE_API_URL", "http://localhost:3000")
|
|
monkeypatch.delenv("NEWS_SOURCES", raising=False)
|
|
|
|
with pytest.raises(ConfigurationError, match="NEWS_SOURCES"):
|
|
Config.from_env()
|
|
|
|
|
|
def test_config_from_env_invalid_news_source(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
"""Test configuration fails when NEWS_SOURCES contains invalid URL."""
|
|
monkeypatch.setenv("OPENAI_API_KEY", "sk-test123")
|
|
monkeypatch.setenv("NODE_API_URL", "http://localhost:3000")
|
|
monkeypatch.setenv("NEWS_SOURCES", "not-a-url")
|
|
|
|
with pytest.raises(ConfigurationError, match="Invalid source URL"):
|
|
Config.from_env()
|
|
|
|
|
|
def test_config_from_env_invalid_timeout(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
"""Test configuration fails when timeout is not a valid integer."""
|
|
monkeypatch.setenv("OPENAI_API_KEY", "sk-test123")
|
|
monkeypatch.setenv("NODE_API_URL", "http://localhost:3000")
|
|
monkeypatch.setenv("NEWS_SOURCES", "https://example.com")
|
|
monkeypatch.setenv("API_TIMEOUT", "invalid")
|
|
|
|
with pytest.raises(ConfigurationError, match="Invalid API_TIMEOUT"):
|
|
Config.from_env()
|
|
|
|
|
|
def test_config_from_env_negative_timeout(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
"""Test configuration fails when timeout is negative."""
|
|
monkeypatch.setenv("OPENAI_API_KEY", "sk-test123")
|
|
monkeypatch.setenv("NODE_API_URL", "http://localhost:3000")
|
|
monkeypatch.setenv("NEWS_SOURCES", "https://example.com")
|
|
monkeypatch.setenv("API_TIMEOUT", "-1")
|
|
|
|
with pytest.raises(ConfigurationError, match="API_TIMEOUT must be positive"):
|
|
Config.from_env()
|
|
|
|
|
|
def test_config_from_env_invalid_log_level(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
"""Test configuration fails when LOG_LEVEL is invalid."""
|
|
monkeypatch.setenv("OPENAI_API_KEY", "sk-test123")
|
|
monkeypatch.setenv("NODE_API_URL", "http://localhost:3000")
|
|
monkeypatch.setenv("NEWS_SOURCES", "https://example.com")
|
|
monkeypatch.setenv("LOG_LEVEL", "INVALID")
|
|
|
|
with pytest.raises(ConfigurationError, match="Invalid LOG_LEVEL"):
|
|
Config.from_env()
|
|
|
|
|
|
def test_config_immutability() -> None:
|
|
"""Test that config objects are immutable."""
|
|
config = APIConfig(
|
|
openai_key="sk-test123", node_api_url="http://localhost:3000"
|
|
)
|
|
|
|
with pytest.raises(Exception): # dataclass frozen=True raises FrozenInstanceError
|
|
config.openai_key = "sk-changed" # type: ignore
|