feedgenerator/scripts/validate.py

"""
Validation script to check project structure and code quality.

Run with: python scripts/validate.py
"""

from __future__ import annotations

import ast
import sys
from pathlib import Path
from typing import List

# Add project root to path
sys.path.insert(0, str(Path(__file__).parent.parent))


def check_file_exists(path: Path, description: str) -> bool:
    """Check if a file exists."""
    if path.exists():
        print(f"✓ {description}: {path}")
        return True
    else:
        print(f"✗ {description} MISSING: {path}")
        return False


def check_type_hints(file_path: Path) -> tuple[bool, List[str]]:
    """Check if all functions have type hints."""
    issues: List[str] = []

    try:
        with open(file_path, "r", encoding="utf-8") as f:
            tree = ast.parse(f.read(), filename=str(file_path))

        for node in ast.walk(tree):
            if isinstance(node, ast.FunctionDef):
                # Skip private functions starting with _
                if node.name.startswith("_") and not node.name.startswith("__"):
                    continue

                # Check if it's a classmethod
                is_classmethod = any(
                    isinstance(dec, ast.Name) and dec.id == "classmethod"
                    for dec in node.decorator_list
                )

                # Check return type annotation
                if node.returns is None:
                    issues.append(
                        f"Function '{node.name}' at line {node.lineno} missing return type"
                    )

                # Check parameter annotations
                for arg in node.args.args:
                    # Skip 'self' and 'cls' (for classmethods)
                    if arg.arg == "self" or (arg.arg == "cls" and is_classmethod):
                        continue
                    if arg.annotation is None:
                        issues.append(
                            f"Function '{node.name}' at line {node.lineno}: "
                            f"parameter '{arg.arg}' missing type hint"
                        )

        return len(issues) == 0, issues

    except Exception as e:
        return False, [f"Error parsing {file_path}: {e}"]


def check_no_bare_except(file_path: Path) -> tuple[bool, List[str]]:
    """Check for bare except clauses."""
    issues: List[str] = []

    try:
        with open(file_path, "r", encoding="utf-8") as f:
            content = f.read()
            lines = content.split("\n")

        for i, line in enumerate(lines, 1):
            stripped = line.strip()
            if stripped == "except:" or stripped.startswith("except:"):
                issues.append(f"Bare except at line {i}")

        return len(issues) == 0, issues

    except Exception as e:
        return False, [f"Error reading {file_path}: {e}"]


def check_no_print_statements(file_path: Path) -> tuple[bool, List[str]]:
    """Check for print statements (should use logger instead)."""
    issues: List[str] = []

    try:
        with open(file_path, "r", encoding="utf-8") as f:
            tree = ast.parse(f.read(), filename=str(file_path))

        for node in ast.walk(tree):
            if isinstance(node, ast.Call):
                if isinstance(node.func, ast.Name) and node.func.id == "print":
                    issues.append(f"print() statement at line {node.lineno}")

        return len(issues) == 0, issues

    except Exception as e:
        return False, [f"Error parsing {file_path}: {e}"]


def validate_project() -> bool:
    """Validate entire project structure and code quality."""
    print("=" * 60)
    print("Feed Generator Project Validation")
    print("=" * 60)
    print()

    all_passed = True

    # Check structure
    print("1. Checking project structure...")
    print("-" * 60)
    root = Path(__file__).parent.parent

    structure_checks = [
        (root / ".env.example", ".env.example"),
        (root / ".gitignore", ".gitignore"),
        (root / "requirements.txt", "requirements.txt"),
        (root / "mypy.ini", "mypy.ini"),
        (root / "README.md", "README.md"),
        (root / "ARCHITECTURE.md", "ARCHITECTURE.md"),
        (root / "CLAUDE.md", "CLAUDE.md"),
        (root / "SETUP.md", "SETUP.md"),
    ]

    for path, desc in structure_checks:
        if not check_file_exists(path, desc):
            all_passed = False

    print()

    # Check source files
    print("2. Checking source files...")
    print("-" * 60)
    src_dir = root / "src"
    source_files = [
        "__init__.py",
        "exceptions.py",
        "config.py",
        "scraper.py",
        "image_analyzer.py",
        "aggregator.py",
        "article_client.py",
        "publisher.py",
    ]

    for filename in source_files:
        if not check_file_exists(src_dir / filename, f"src/{filename}"):
            all_passed = False

    print()

    # Check test files
    print("3. Checking test files...")
    print("-" * 60)
    tests_dir = root / "tests"
    test_files = [
        "__init__.py",
        "test_config.py",
        "test_scraper.py",
        "test_aggregator.py",
    ]

    for filename in test_files:
        if not check_file_exists(tests_dir / filename, f"tests/{filename}"):
            all_passed = False

    print()

    # Check code quality
    print("4. Checking code quality (type hints, no bare except, no print)...")
    print("-" * 60)

    python_files = list(src_dir.glob("*.py"))
    python_files.extend(list((root / "scripts").glob("*.py")))

    for py_file in python_files:
        if py_file.name == "__init__.py":
            continue

        print(f"\nChecking {py_file.relative_to(root)}...")

        # Check type hints
        has_types, type_issues = check_type_hints(py_file)
        if not has_types:
            print(f"  ✗ Type hint issues:")
            for issue in type_issues[:5]:  # Show first 5
                print(f"    - {issue}")
            if len(type_issues) > 5:
                print(f"    ... and {len(type_issues) - 5} more")
            all_passed = False
        else:
            print("  ✓ All functions have type hints")

        # Check bare except
        no_bare, bare_issues = check_no_bare_except(py_file)
        if not no_bare:
            print(f"  ✗ Bare except issues:")
            for issue in bare_issues:
                print(f"    - {issue}")
            all_passed = False
        else:
            print("  ✓ No bare except clauses")

        # Check print statements (only in src/, not scripts/)
        if "src" in str(py_file):
            no_print, print_issues = check_no_print_statements(py_file)
            if not no_print:
                print(f"  ✗ Print statement issues:")
                for issue in print_issues:
                    print(f"    - {issue}")
                all_passed = False
            else:
                print("  ✓ No print statements (using logger)")

    print()
    print("=" * 60)
    if all_passed:
        print("✅ ALL VALIDATION CHECKS PASSED!")
        print("=" * 60)
        print()
        print("Next steps:")
        print("1. Create .env file: cp .env.example .env")
        print("2. Edit .env with your API keys")
        print("3. Install dependencies: pip install -r requirements.txt")
        print("4. Run type checking: mypy src/")
        print("5. Run tests: pytest tests/")
        print("6. Run pipeline: python scripts/run.py")
        return True
    else:
        print("❌ SOME VALIDATION CHECKS FAILED")
        print("=" * 60)
        print("Please fix the issues above before proceeding.")
        return False


if __name__ == "__main__":
    success = validate_project()
    sys.exit(0 if success else 1)