ProjectTracker/tools/generate_project_files.py

#!/usr/bin/env python3
"""
Generate missing project tracking .md files by inspecting repositories.

Usage:
    python tools/generate_project_files.py [--dry-run] [--repo REPO_NAME]
"""

import json
import os
import sys
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Optional
import subprocess

# Force UTF-8 encoding for Windows console (emoji support)
os.environ['PYTHONIOENCODING'] = 'utf-8'
if hasattr(sys.stdout, 'reconfigure'):
    sys.stdout.reconfigure(encoding='utf-8')

# Paths
SCRIPT_DIR = Path(__file__).parent
PROJECT_TRACKER = SCRIPT_DIR.parent
CONFIG_FILE = SCRIPT_DIR / "config.json"
TEMPLATE_FILE = SCRIPT_DIR / "templates" / "BASE.md"

# Category names
CATEGORY_NAMES = {
    "META": "Meta-Project (Infrastructure/Coordination)",
    "CONSTANT": "Constant (Ongoing/Maintenance)",
    "WIP": "Work In Progress (Active Development)",
    "CONCEPT": "Concept (Idea Stage)",
    "PAUSE": "Paused (Temporarily Inactive)",
    "DONE": "Done (Completed/Archived)"
}

# Language detection
LANGUAGE_EXTENSIONS = {
    ".py": "Python",
    ".js": "JavaScript",
    ".ts": "TypeScript",
    ".cpp": "C++",
    ".c": "C",
    ".h": "C/C++ Headers",
    ".hpp": "C++ Headers",
    ".java": "Java",
    ".cs": "C#",
    ".go": "Go",
    ".rs": "Rust",
    ".rb": "Ruby",
    ".php": "PHP",
    ".swift": "Swift",
    ".kt": "Kotlin",
    ".md": "Markdown",
    ".json": "JSON",
    ".yaml": "YAML",
    ".yml": "YAML",
    ".sh": "Shell Script",
    ".bat": "Batch Script",
    ".ps1": "PowerShell",
}


def load_json(path: Path) -> dict:
    """Load JSON file"""
    with open(path, 'r', encoding='utf-8') as f:
        return json.load(f)


def load_template() -> str:
    """Load base template"""
    with open(TEMPLATE_FILE, 'r', encoding='utf-8') as f:
        return f.read()


def get_category_from_path(project_file: str) -> str:
    """Extract category from project file path

    Example: projects/WIP/repo.md -> WIP
    """
    parts = project_file.split('/')
    if len(parts) >= 2 and parts[0] == "projects":
        return parts[1]
    return "UNCATEGORIZED"


def read_file_safe(path: Path, max_lines: int = 50) -> str:
    """Read file with error handling"""
    try:
        with open(path, 'r', encoding='utf-8', errors='ignore') as f:
            lines = []
            for i, line in enumerate(f):
                if i >= max_lines:
                    break
                lines.append(line.rstrip())
            return '\n'.join(lines)
    except Exception:
        return ""


def detect_languages(repo_path: Path) -> List[str]:
    """Detect programming languages from file extensions"""
    languages = set()

    try:
        for root, dirs, files in os.walk(repo_path):
            # Skip common ignore directories
            dirs[:] = [d for d in dirs if d not in {'.git', 'node_modules', '__pycache__', 'build', 'dist', '.venv', 'venv'}]

            for file in files:
                ext = Path(file).suffix.lower()
                if ext in LANGUAGE_EXTENSIONS:
                    languages.add(LANGUAGE_EXTENSIONS[ext])
    except Exception:
        pass

    return sorted(languages)


def extract_description_from_claude_md(content: str) -> str:
    """Extract description from CLAUDE.md"""
    lines = content.split('\n')

    # Look for first heading or first paragraph
    description_lines = []
    in_description = False

    for line in lines:
        stripped = line.strip()

        # Skip title/headers until we find content
        if stripped.startswith('#'):
            if description_lines:  # If we already have content, stop at next header
                break
            continue

        # Skip empty lines at start
        if not stripped and not description_lines:
            continue

        # Collect content
        if stripped:
            description_lines.append(stripped)
            if len(description_lines) >= 3:  # First 3 non-empty lines
                break

    return ' '.join(description_lines[:2]) if description_lines else "No description available."


def extract_status_from_claude_md(content: str) -> str:
    """Extract current status from CLAUDE.md"""
    lines = content.split('\n')

    # Look for status-related sections
    status_keywords = ['statut', 'status', 'current', 'phase', 'état']
    status_lines = []

    for i, line in enumerate(lines):
        lower = line.lower()

        # Check if line contains status keyword
        if any(kw in lower for kw in status_keywords):
            # Collect next few non-empty lines
            for j in range(i, min(i+5, len(lines))):
                stripped = lines[j].strip()
                if stripped and not stripped.startswith('#'):
                    status_lines.append(stripped)
                    if len(status_lines) >= 2:
                        break
            break

    return ' '.join(status_lines) if status_lines else "Status unknown - inspect repository."


def get_last_commit_info(repo_path: Path) -> Dict[str, str]:
    """Get last commit date and message"""
    try:
        # Check if it's a git repo
        git_dir = repo_path / ".git"
        if not git_dir.exists():
            return {"date": "Unknown", "message": "Not a git repository"}

        # Get last commit date
        result = subprocess.run(
            ["git", "-C", str(repo_path), "log", "-1", "--format=%ar"],
            capture_output=True,
            text=True,
            timeout=5
        )
        date = result.stdout.strip() if result.returncode == 0 else "Unknown"

        # Get last commit message
        result = subprocess.run(
            ["git", "-C", str(repo_path), "log", "-1", "--format=%s"],
            capture_output=True,
            text=True,
            timeout=5
        )
        message = result.stdout.strip() if result.returncode == 0 else "No commits"

        return {"date": date, "message": message}
    except Exception:
        return {"date": "Unknown", "message": "Error reading git history"}


def get_directory_structure(repo_path: Path) -> List[str]:
    """Get high-level directory structure"""
    try:
        dirs = []
        for item in sorted(repo_path.iterdir()):
            if item.is_dir() and not item.name.startswith('.'):
                dirs.append(item.name)
        return dirs[:10]  # Top 10 directories
    except Exception:
        return []


def inspect_repo(repo_name: str, repo_path: Path) -> Dict[str, any]:
    """Inspect repository and extract information"""

    info = {
        "repo_exists": repo_path.exists(),
        "has_claude_md": False,
        "has_readme": False,
        "has_todo": False,
        "claude_content": "",
        "readme_content": "",
        "todo_content": "",
        "languages": [],
        "last_commit": {},
        "directories": []
    }

    if not repo_path.exists():
        return info

    # Check for CLAUDE.md
    claude_path = repo_path / "CLAUDE.md"
    if claude_path.exists():
        info["has_claude_md"] = True
        info["claude_content"] = read_file_safe(claude_path, max_lines=100)

    # Check for README
    for readme_name in ["README.md", "Readme.md", "readme.md"]:
        readme_path = repo_path / readme_name
        if readme_path.exists():
            info["has_readme"] = True
            info["readme_content"] = read_file_safe(readme_path, max_lines=50)
            break

    # Check for TODO.md
    todo_path = repo_path / "TODO.md"
    if todo_path.exists():
        info["has_todo"] = True
        info["todo_content"] = read_file_safe(todo_path, max_lines=20)

    # Detect languages
    info["languages"] = detect_languages(repo_path)

    # Get last commit
    info["last_commit"] = get_last_commit_info(repo_path)

    # Get directory structure
    info["directories"] = get_directory_structure(repo_path)

    return info


def generate_project_md(repo_name: str, category: str, info: Dict, template: str) -> str:
    """Generate project .md content from template and extracted info"""

    # Extract description
    if info["has_claude_md"]:
        description = extract_description_from_claude_md(info["claude_content"])
    elif info["has_readme"]:
        description = extract_description_from_claude_md(info["readme_content"])  # Same logic works
    else:
        description = "No description available - repository needs documentation."

    # Tech stack
    if info["languages"]:
        tech_stack = ", ".join(info["languages"])
    else:
        tech_stack = "Unknown - inspect repository"

    # Current status
    if info["has_claude_md"]:
        current_status = extract_status_from_claude_md(info["claude_content"])
    else:
        last_commit = info["last_commit"]
        current_status = f"Last commit: {last_commit['message']} ({last_commit['date']})"

    # Key features
    key_features = "- See CLAUDE.md for detailed features" if info["has_claude_md"] else "- Features not documented yet"

    # Structure
    if info["directories"]:
        structure = "```\n" + "\n".join([f"{d}/" for d in info["directories"]]) + "\n```"
    else:
        structure = "Structure not analyzed yet."

    # Next steps
    if info["has_todo"]:
        todo_lines = [line.strip() for line in info["todo_content"].split('\n') if line.strip() and not line.startswith('#')]
        next_steps = "\n".join([f"- {line}" for line in todo_lines[:5]])
    else:
        next_steps = "- See TODO.md or project plans for next steps"

    # Notes
    notes_parts = []
    if not info["repo_exists"]:
        notes_parts.append("⚠️ Repository not found in expected location")
    if not info["has_claude_md"]:
        notes_parts.append("⚠️ No CLAUDE.md found - add project documentation")

    notes = "\n".join(notes_parts) if notes_parts else "None"

    # Project type (infer from languages/structure - prioritize main language)
    project_type = "Unknown"
    lang_list = info["languages"]

    # Prioritize based on typical primary languages
    if "C++" in lang_list or "C" in lang_list:
        project_type = "C++ Application"
    elif "Python" in lang_list:
        project_type = "Python Application"
    elif "JavaScript" in lang_list or "TypeScript" in lang_list:
        project_type = "JavaScript/Node.js Application"
    elif "C#" in lang_list:
        project_type = "C# Application"
    elif "Rust" in lang_list:
        project_type = "Rust Application"
    elif "Go" in lang_list:
        project_type = "Go Application"

    # Fill template
    today = datetime.now().strftime("%Y-%m-%d")
    content = template.format(
        project_name=repo_name,
        category=category,
        category_full=CATEGORY_NAMES.get(category, category),
        project_type=project_type,
        repo_name=repo_name,
        description=description,
        tech_stack=tech_stack,
        current_status=current_status,
        key_features=key_features,
        structure=structure,
        next_steps=next_steps,
        notes=notes,
        created_date=today,
        updated_date=today
    )

    return content


def main():
    """Main function"""

    # Parse arguments
    dry_run = "--dry-run" in sys.argv
    specific_repo = None
    if "--repo" in sys.argv:
        idx = sys.argv.index("--repo")
        if idx + 1 < len(sys.argv):
            specific_repo = sys.argv[idx + 1]

    # Load config
    config = load_json(CONFIG_FILE)
    repos_root = Path(config["repos_root"])
    project_mapping = config["project_mapping"]

    # Load template
    template = load_template()

    # Stats
    stats = {
        "total": 0,
        "generated": 0,
        "skipped": 0,
        "errors": 0
    }

    print("🔍 ProjectTracker - Project File Generator\n")

    # Process each repository
    for repo_name, project_file in project_mapping.items():
        # Filter if specific repo requested
        if specific_repo and repo_name != specific_repo:
            continue

        stats["total"] += 1

        # Check if file already exists
        project_path = PROJECT_TRACKER / project_file
        if project_path.exists():
            print(f"⏭️  {repo_name} - Already exists, skipping")
            stats["skipped"] += 1
            continue

        # Inspect repository
        repo_path = repos_root / repo_name
        print(f"📂 {repo_name} - Inspecting...")

        try:
            info = inspect_repo(repo_name, repo_path)
            category = get_category_from_path(project_file)
            content = generate_project_md(repo_name, category, info, template)

            if dry_run:
                print(f"   [DRY RUN] Would generate: {project_file}")
                print(f"   Languages: {', '.join(info['languages']) if info['languages'] else 'None'}")
                print(f"   Has CLAUDE.md: {info['has_claude_md']}")
            else:
                # Ensure directory exists
                project_path.parent.mkdir(parents=True, exist_ok=True)

                # Write file
                with open(project_path, 'w', encoding='utf-8') as f:
                    f.write(content)

                print(f"✅ {repo_name} - Generated: {project_file}")
                stats["generated"] += 1

        except Exception as e:
            print(f"❌ {repo_name} - Error: {e}")
            stats["errors"] += 1

    # Summary
    print(f"\n{'='*60}")
    print(f"📊 Summary:")
    print(f"   Total repos: {stats['total']}")
    print(f"   Generated: {stats['generated']}")
    print(f"   Skipped (already exist): {stats['skipped']}")
    print(f"   Errors: {stats['errors']}")

    if dry_run:
        print(f"\n💡 This was a dry run. Remove --dry-run to generate files.")


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\n\n⚠️  Interrupted by user")
        sys.exit(1)
    except Exception as e:
        print(f"\n❌ Fatal error: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)