#!/usr/bin/env python3 """ Generate missing project tracking .md files by inspecting repositories. Usage: python tools/generate_project_files.py [--dry-run] [--repo REPO_NAME] """ import json import os import sys from pathlib import Path from datetime import datetime from typing import Dict, List, Optional import subprocess # Force UTF-8 encoding for Windows console (emoji support) os.environ['PYTHONIOENCODING'] = 'utf-8' if hasattr(sys.stdout, 'reconfigure'): sys.stdout.reconfigure(encoding='utf-8') # Paths SCRIPT_DIR = Path(__file__).parent PROJECT_TRACKER = SCRIPT_DIR.parent CONFIG_FILE = SCRIPT_DIR / "config.json" TEMPLATE_FILE = SCRIPT_DIR / "templates" / "BASE.md" # Category names CATEGORY_NAMES = { "META": "Meta-Project (Infrastructure/Coordination)", "CONSTANT": "Constant (Ongoing/Maintenance)", "WIP": "Work In Progress (Active Development)", "CONCEPT": "Concept (Idea Stage)", "PAUSE": "Paused (Temporarily Inactive)", "DONE": "Done (Completed/Archived)" } # Language detection LANGUAGE_EXTENSIONS = { ".py": "Python", ".js": "JavaScript", ".ts": "TypeScript", ".cpp": "C++", ".c": "C", ".h": "C/C++ Headers", ".hpp": "C++ Headers", ".java": "Java", ".cs": "C#", ".go": "Go", ".rs": "Rust", ".rb": "Ruby", ".php": "PHP", ".swift": "Swift", ".kt": "Kotlin", ".md": "Markdown", ".json": "JSON", ".yaml": "YAML", ".yml": "YAML", ".sh": "Shell Script", ".bat": "Batch Script", ".ps1": "PowerShell", } def load_json(path: Path) -> dict: """Load JSON file""" with open(path, 'r', encoding='utf-8') as f: return json.load(f) def load_template() -> str: """Load base template""" with open(TEMPLATE_FILE, 'r', encoding='utf-8') as f: return f.read() def get_category_from_path(project_file: str) -> str: """Extract category from project file path Example: projects/WIP/repo.md -> WIP """ parts = project_file.split('/') if len(parts) >= 2 and parts[0] == "projects": return parts[1] return "UNCATEGORIZED" def read_file_safe(path: Path, max_lines: int = 50) -> str: """Read file with error handling""" try: with open(path, 'r', encoding='utf-8', errors='ignore') as f: lines = [] for i, line in enumerate(f): if i >= max_lines: break lines.append(line.rstrip()) return '\n'.join(lines) except Exception: return "" def detect_languages(repo_path: Path) -> List[str]: """Detect programming languages from file extensions""" languages = set() try: for root, dirs, files in os.walk(repo_path): # Skip common ignore directories dirs[:] = [d for d in dirs if d not in {'.git', 'node_modules', '__pycache__', 'build', 'dist', '.venv', 'venv'}] for file in files: ext = Path(file).suffix.lower() if ext in LANGUAGE_EXTENSIONS: languages.add(LANGUAGE_EXTENSIONS[ext]) except Exception: pass return sorted(languages) def extract_description_from_claude_md(content: str) -> str: """Extract description from CLAUDE.md""" lines = content.split('\n') # Look for first heading or first paragraph description_lines = [] in_description = False for line in lines: stripped = line.strip() # Skip title/headers until we find content if stripped.startswith('#'): if description_lines: # If we already have content, stop at next header break continue # Skip empty lines at start if not stripped and not description_lines: continue # Collect content if stripped: description_lines.append(stripped) if len(description_lines) >= 3: # First 3 non-empty lines break return ' '.join(description_lines[:2]) if description_lines else "No description available." def extract_status_from_claude_md(content: str) -> str: """Extract current status from CLAUDE.md""" lines = content.split('\n') # Look for status-related sections status_keywords = ['statut', 'status', 'current', 'phase', 'état'] status_lines = [] for i, line in enumerate(lines): lower = line.lower() # Check if line contains status keyword if any(kw in lower for kw in status_keywords): # Collect next few non-empty lines for j in range(i, min(i+5, len(lines))): stripped = lines[j].strip() if stripped and not stripped.startswith('#'): status_lines.append(stripped) if len(status_lines) >= 2: break break return ' '.join(status_lines) if status_lines else "Status unknown - inspect repository." def get_last_commit_info(repo_path: Path) -> Dict[str, str]: """Get last commit date and message""" try: # Check if it's a git repo git_dir = repo_path / ".git" if not git_dir.exists(): return {"date": "Unknown", "message": "Not a git repository"} # Get last commit date result = subprocess.run( ["git", "-C", str(repo_path), "log", "-1", "--format=%ar"], capture_output=True, text=True, timeout=5 ) date = result.stdout.strip() if result.returncode == 0 else "Unknown" # Get last commit message result = subprocess.run( ["git", "-C", str(repo_path), "log", "-1", "--format=%s"], capture_output=True, text=True, timeout=5 ) message = result.stdout.strip() if result.returncode == 0 else "No commits" return {"date": date, "message": message} except Exception: return {"date": "Unknown", "message": "Error reading git history"} def get_directory_structure(repo_path: Path) -> List[str]: """Get high-level directory structure""" try: dirs = [] for item in sorted(repo_path.iterdir()): if item.is_dir() and not item.name.startswith('.'): dirs.append(item.name) return dirs[:10] # Top 10 directories except Exception: return [] def inspect_repo(repo_name: str, repo_path: Path) -> Dict[str, any]: """Inspect repository and extract information""" info = { "repo_exists": repo_path.exists(), "has_claude_md": False, "has_readme": False, "has_todo": False, "claude_content": "", "readme_content": "", "todo_content": "", "languages": [], "last_commit": {}, "directories": [] } if not repo_path.exists(): return info # Check for CLAUDE.md claude_path = repo_path / "CLAUDE.md" if claude_path.exists(): info["has_claude_md"] = True info["claude_content"] = read_file_safe(claude_path, max_lines=100) # Check for README for readme_name in ["README.md", "Readme.md", "readme.md"]: readme_path = repo_path / readme_name if readme_path.exists(): info["has_readme"] = True info["readme_content"] = read_file_safe(readme_path, max_lines=50) break # Check for TODO.md todo_path = repo_path / "TODO.md" if todo_path.exists(): info["has_todo"] = True info["todo_content"] = read_file_safe(todo_path, max_lines=20) # Detect languages info["languages"] = detect_languages(repo_path) # Get last commit info["last_commit"] = get_last_commit_info(repo_path) # Get directory structure info["directories"] = get_directory_structure(repo_path) return info def generate_project_md(repo_name: str, category: str, info: Dict, template: str) -> str: """Generate project .md content from template and extracted info""" # Extract description if info["has_claude_md"]: description = extract_description_from_claude_md(info["claude_content"]) elif info["has_readme"]: description = extract_description_from_claude_md(info["readme_content"]) # Same logic works else: description = "No description available - repository needs documentation." # Tech stack if info["languages"]: tech_stack = ", ".join(info["languages"]) else: tech_stack = "Unknown - inspect repository" # Current status if info["has_claude_md"]: current_status = extract_status_from_claude_md(info["claude_content"]) else: last_commit = info["last_commit"] current_status = f"Last commit: {last_commit['message']} ({last_commit['date']})" # Key features key_features = "- See CLAUDE.md for detailed features" if info["has_claude_md"] else "- Features not documented yet" # Structure if info["directories"]: structure = "```\n" + "\n".join([f"{d}/" for d in info["directories"]]) + "\n```" else: structure = "Structure not analyzed yet." # Next steps if info["has_todo"]: todo_lines = [line.strip() for line in info["todo_content"].split('\n') if line.strip() and not line.startswith('#')] next_steps = "\n".join([f"- {line}" for line in todo_lines[:5]]) else: next_steps = "- See TODO.md or project plans for next steps" # Notes notes_parts = [] if not info["repo_exists"]: notes_parts.append("⚠️ Repository not found in expected location") if not info["has_claude_md"]: notes_parts.append("⚠️ No CLAUDE.md found - add project documentation") notes = "\n".join(notes_parts) if notes_parts else "None" # Project type (infer from languages/structure - prioritize main language) project_type = "Unknown" lang_list = info["languages"] # Prioritize based on typical primary languages if "C++" in lang_list or "C" in lang_list: project_type = "C++ Application" elif "Python" in lang_list: project_type = "Python Application" elif "JavaScript" in lang_list or "TypeScript" in lang_list: project_type = "JavaScript/Node.js Application" elif "C#" in lang_list: project_type = "C# Application" elif "Rust" in lang_list: project_type = "Rust Application" elif "Go" in lang_list: project_type = "Go Application" # Fill template today = datetime.now().strftime("%Y-%m-%d") content = template.format( project_name=repo_name, category=category, category_full=CATEGORY_NAMES.get(category, category), project_type=project_type, repo_name=repo_name, description=description, tech_stack=tech_stack, current_status=current_status, key_features=key_features, structure=structure, next_steps=next_steps, notes=notes, created_date=today, updated_date=today ) return content def main(): """Main function""" # Parse arguments dry_run = "--dry-run" in sys.argv specific_repo = None if "--repo" in sys.argv: idx = sys.argv.index("--repo") if idx + 1 < len(sys.argv): specific_repo = sys.argv[idx + 1] # Load config config = load_json(CONFIG_FILE) repos_root = Path(config["repos_root"]) project_mapping = config["project_mapping"] # Load template template = load_template() # Stats stats = { "total": 0, "generated": 0, "skipped": 0, "errors": 0 } print("🔍 ProjectTracker - Project File Generator\n") # Process each repository for repo_name, project_file in project_mapping.items(): # Filter if specific repo requested if specific_repo and repo_name != specific_repo: continue stats["total"] += 1 # Check if file already exists project_path = PROJECT_TRACKER / project_file if project_path.exists(): print(f"⏭️ {repo_name} - Already exists, skipping") stats["skipped"] += 1 continue # Inspect repository repo_path = repos_root / repo_name print(f"📂 {repo_name} - Inspecting...") try: info = inspect_repo(repo_name, repo_path) category = get_category_from_path(project_file) content = generate_project_md(repo_name, category, info, template) if dry_run: print(f" [DRY RUN] Would generate: {project_file}") print(f" Languages: {', '.join(info['languages']) if info['languages'] else 'None'}") print(f" Has CLAUDE.md: {info['has_claude_md']}") else: # Ensure directory exists project_path.parent.mkdir(parents=True, exist_ok=True) # Write file with open(project_path, 'w', encoding='utf-8') as f: f.write(content) print(f"✅ {repo_name} - Generated: {project_file}") stats["generated"] += 1 except Exception as e: print(f"❌ {repo_name} - Error: {e}") stats["errors"] += 1 # Summary print(f"\n{'='*60}") print(f"📊 Summary:") print(f" Total repos: {stats['total']}") print(f" Generated: {stats['generated']}") print(f" Skipped (already exist): {stats['skipped']}") print(f" Errors: {stats['errors']}") if dry_run: print(f"\n💡 This was a dry run. Remove --dry-run to generate files.") if __name__ == "__main__": try: main() except KeyboardInterrupt: print("\n\n⚠️ Interrupted by user") sys.exit(1) except Exception as e: print(f"\n❌ Fatal error: {e}") import traceback traceback.print_exc() sys.exit(1)