ProjectTracker/tools/generate_project_files.py
StillHammer 8dc67ac03c Add project file generator tool
Created automated generator for project tracking .md files.

Features:
- Inspects repositories to extract information
- Detects tech stack from file extensions
- Extracts descriptions from CLAUDE.md/README.md
- Extracts status from documentation
- Gets git history (last commit info)
- Analyzes directory structure
- Generates .md files from template

Usage:
  python tools/generate_project_files.py          # Generate all missing
  python tools/generate_project_files.py --dry-run  # Preview only
  python tools/generate_project_files.py --repo RepoName  # Single repo

Template system:
- BASE.md template with category-specific labels
- UTF-8/emoji support for Windows
- Extensible for future enhancements

Part of Gap 3 resolution (missing project tracking files).

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-19 01:42:57 +07:00

454 lines
14 KiB
Python

#!/usr/bin/env python3
"""
Generate missing project tracking .md files by inspecting repositories.
Usage:
python tools/generate_project_files.py [--dry-run] [--repo REPO_NAME]
"""
import json
import os
import sys
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Optional
import subprocess
# Force UTF-8 encoding for Windows console (emoji support)
os.environ['PYTHONIOENCODING'] = 'utf-8'
if hasattr(sys.stdout, 'reconfigure'):
sys.stdout.reconfigure(encoding='utf-8')
# Paths
SCRIPT_DIR = Path(__file__).parent
PROJECT_TRACKER = SCRIPT_DIR.parent
CONFIG_FILE = SCRIPT_DIR / "config.json"
TEMPLATE_FILE = SCRIPT_DIR / "templates" / "BASE.md"
# Category names
CATEGORY_NAMES = {
"META": "Meta-Project (Infrastructure/Coordination)",
"CONSTANT": "Constant (Ongoing/Maintenance)",
"WIP": "Work In Progress (Active Development)",
"CONCEPT": "Concept (Idea Stage)",
"PAUSE": "Paused (Temporarily Inactive)",
"DONE": "Done (Completed/Archived)"
}
# Language detection
LANGUAGE_EXTENSIONS = {
".py": "Python",
".js": "JavaScript",
".ts": "TypeScript",
".cpp": "C++",
".c": "C",
".h": "C/C++ Headers",
".hpp": "C++ Headers",
".java": "Java",
".cs": "C#",
".go": "Go",
".rs": "Rust",
".rb": "Ruby",
".php": "PHP",
".swift": "Swift",
".kt": "Kotlin",
".md": "Markdown",
".json": "JSON",
".yaml": "YAML",
".yml": "YAML",
".sh": "Shell Script",
".bat": "Batch Script",
".ps1": "PowerShell",
}
def load_json(path: Path) -> dict:
"""Load JSON file"""
with open(path, 'r', encoding='utf-8') as f:
return json.load(f)
def load_template() -> str:
"""Load base template"""
with open(TEMPLATE_FILE, 'r', encoding='utf-8') as f:
return f.read()
def get_category_from_path(project_file: str) -> str:
"""Extract category from project file path
Example: projects/WIP/repo.md -> WIP
"""
parts = project_file.split('/')
if len(parts) >= 2 and parts[0] == "projects":
return parts[1]
return "UNCATEGORIZED"
def read_file_safe(path: Path, max_lines: int = 50) -> str:
"""Read file with error handling"""
try:
with open(path, 'r', encoding='utf-8', errors='ignore') as f:
lines = []
for i, line in enumerate(f):
if i >= max_lines:
break
lines.append(line.rstrip())
return '\n'.join(lines)
except Exception:
return ""
def detect_languages(repo_path: Path) -> List[str]:
"""Detect programming languages from file extensions"""
languages = set()
try:
for root, dirs, files in os.walk(repo_path):
# Skip common ignore directories
dirs[:] = [d for d in dirs if d not in {'.git', 'node_modules', '__pycache__', 'build', 'dist', '.venv', 'venv'}]
for file in files:
ext = Path(file).suffix.lower()
if ext in LANGUAGE_EXTENSIONS:
languages.add(LANGUAGE_EXTENSIONS[ext])
except Exception:
pass
return sorted(languages)
def extract_description_from_claude_md(content: str) -> str:
"""Extract description from CLAUDE.md"""
lines = content.split('\n')
# Look for first heading or first paragraph
description_lines = []
in_description = False
for line in lines:
stripped = line.strip()
# Skip title/headers until we find content
if stripped.startswith('#'):
if description_lines: # If we already have content, stop at next header
break
continue
# Skip empty lines at start
if not stripped and not description_lines:
continue
# Collect content
if stripped:
description_lines.append(stripped)
if len(description_lines) >= 3: # First 3 non-empty lines
break
return ' '.join(description_lines[:2]) if description_lines else "No description available."
def extract_status_from_claude_md(content: str) -> str:
"""Extract current status from CLAUDE.md"""
lines = content.split('\n')
# Look for status-related sections
status_keywords = ['statut', 'status', 'current', 'phase', 'état']
status_lines = []
for i, line in enumerate(lines):
lower = line.lower()
# Check if line contains status keyword
if any(kw in lower for kw in status_keywords):
# Collect next few non-empty lines
for j in range(i, min(i+5, len(lines))):
stripped = lines[j].strip()
if stripped and not stripped.startswith('#'):
status_lines.append(stripped)
if len(status_lines) >= 2:
break
break
return ' '.join(status_lines) if status_lines else "Status unknown - inspect repository."
def get_last_commit_info(repo_path: Path) -> Dict[str, str]:
"""Get last commit date and message"""
try:
# Check if it's a git repo
git_dir = repo_path / ".git"
if not git_dir.exists():
return {"date": "Unknown", "message": "Not a git repository"}
# Get last commit date
result = subprocess.run(
["git", "-C", str(repo_path), "log", "-1", "--format=%ar"],
capture_output=True,
text=True,
timeout=5
)
date = result.stdout.strip() if result.returncode == 0 else "Unknown"
# Get last commit message
result = subprocess.run(
["git", "-C", str(repo_path), "log", "-1", "--format=%s"],
capture_output=True,
text=True,
timeout=5
)
message = result.stdout.strip() if result.returncode == 0 else "No commits"
return {"date": date, "message": message}
except Exception:
return {"date": "Unknown", "message": "Error reading git history"}
def get_directory_structure(repo_path: Path) -> List[str]:
"""Get high-level directory structure"""
try:
dirs = []
for item in sorted(repo_path.iterdir()):
if item.is_dir() and not item.name.startswith('.'):
dirs.append(item.name)
return dirs[:10] # Top 10 directories
except Exception:
return []
def inspect_repo(repo_name: str, repo_path: Path) -> Dict[str, any]:
"""Inspect repository and extract information"""
info = {
"repo_exists": repo_path.exists(),
"has_claude_md": False,
"has_readme": False,
"has_todo": False,
"claude_content": "",
"readme_content": "",
"todo_content": "",
"languages": [],
"last_commit": {},
"directories": []
}
if not repo_path.exists():
return info
# Check for CLAUDE.md
claude_path = repo_path / "CLAUDE.md"
if claude_path.exists():
info["has_claude_md"] = True
info["claude_content"] = read_file_safe(claude_path, max_lines=100)
# Check for README
for readme_name in ["README.md", "Readme.md", "readme.md"]:
readme_path = repo_path / readme_name
if readme_path.exists():
info["has_readme"] = True
info["readme_content"] = read_file_safe(readme_path, max_lines=50)
break
# Check for TODO.md
todo_path = repo_path / "TODO.md"
if todo_path.exists():
info["has_todo"] = True
info["todo_content"] = read_file_safe(todo_path, max_lines=20)
# Detect languages
info["languages"] = detect_languages(repo_path)
# Get last commit
info["last_commit"] = get_last_commit_info(repo_path)
# Get directory structure
info["directories"] = get_directory_structure(repo_path)
return info
def generate_project_md(repo_name: str, category: str, info: Dict, template: str) -> str:
"""Generate project .md content from template and extracted info"""
# Extract description
if info["has_claude_md"]:
description = extract_description_from_claude_md(info["claude_content"])
elif info["has_readme"]:
description = extract_description_from_claude_md(info["readme_content"]) # Same logic works
else:
description = "No description available - repository needs documentation."
# Tech stack
if info["languages"]:
tech_stack = ", ".join(info["languages"])
else:
tech_stack = "Unknown - inspect repository"
# Current status
if info["has_claude_md"]:
current_status = extract_status_from_claude_md(info["claude_content"])
else:
last_commit = info["last_commit"]
current_status = f"Last commit: {last_commit['message']} ({last_commit['date']})"
# Key features
key_features = "- See CLAUDE.md for detailed features" if info["has_claude_md"] else "- Features not documented yet"
# Structure
if info["directories"]:
structure = "```\n" + "\n".join([f"{d}/" for d in info["directories"]]) + "\n```"
else:
structure = "Structure not analyzed yet."
# Next steps
if info["has_todo"]:
todo_lines = [line.strip() for line in info["todo_content"].split('\n') if line.strip() and not line.startswith('#')]
next_steps = "\n".join([f"- {line}" for line in todo_lines[:5]])
else:
next_steps = "- See TODO.md or project plans for next steps"
# Notes
notes_parts = []
if not info["repo_exists"]:
notes_parts.append("⚠️ Repository not found in expected location")
if not info["has_claude_md"]:
notes_parts.append("⚠️ No CLAUDE.md found - add project documentation")
notes = "\n".join(notes_parts) if notes_parts else "None"
# Project type (infer from languages/structure - prioritize main language)
project_type = "Unknown"
lang_list = info["languages"]
# Prioritize based on typical primary languages
if "C++" in lang_list or "C" in lang_list:
project_type = "C++ Application"
elif "Python" in lang_list:
project_type = "Python Application"
elif "JavaScript" in lang_list or "TypeScript" in lang_list:
project_type = "JavaScript/Node.js Application"
elif "C#" in lang_list:
project_type = "C# Application"
elif "Rust" in lang_list:
project_type = "Rust Application"
elif "Go" in lang_list:
project_type = "Go Application"
# Fill template
today = datetime.now().strftime("%Y-%m-%d")
content = template.format(
project_name=repo_name,
category=category,
category_full=CATEGORY_NAMES.get(category, category),
project_type=project_type,
repo_name=repo_name,
description=description,
tech_stack=tech_stack,
current_status=current_status,
key_features=key_features,
structure=structure,
next_steps=next_steps,
notes=notes,
created_date=today,
updated_date=today
)
return content
def main():
"""Main function"""
# Parse arguments
dry_run = "--dry-run" in sys.argv
specific_repo = None
if "--repo" in sys.argv:
idx = sys.argv.index("--repo")
if idx + 1 < len(sys.argv):
specific_repo = sys.argv[idx + 1]
# Load config
config = load_json(CONFIG_FILE)
repos_root = Path(config["repos_root"])
project_mapping = config["project_mapping"]
# Load template
template = load_template()
# Stats
stats = {
"total": 0,
"generated": 0,
"skipped": 0,
"errors": 0
}
print("🔍 ProjectTracker - Project File Generator\n")
# Process each repository
for repo_name, project_file in project_mapping.items():
# Filter if specific repo requested
if specific_repo and repo_name != specific_repo:
continue
stats["total"] += 1
# Check if file already exists
project_path = PROJECT_TRACKER / project_file
if project_path.exists():
print(f"⏭️ {repo_name} - Already exists, skipping")
stats["skipped"] += 1
continue
# Inspect repository
repo_path = repos_root / repo_name
print(f"📂 {repo_name} - Inspecting...")
try:
info = inspect_repo(repo_name, repo_path)
category = get_category_from_path(project_file)
content = generate_project_md(repo_name, category, info, template)
if dry_run:
print(f" [DRY RUN] Would generate: {project_file}")
print(f" Languages: {', '.join(info['languages']) if info['languages'] else 'None'}")
print(f" Has CLAUDE.md: {info['has_claude_md']}")
else:
# Ensure directory exists
project_path.parent.mkdir(parents=True, exist_ok=True)
# Write file
with open(project_path, 'w', encoding='utf-8') as f:
f.write(content)
print(f"{repo_name} - Generated: {project_file}")
stats["generated"] += 1
except Exception as e:
print(f"{repo_name} - Error: {e}")
stats["errors"] += 1
# Summary
print(f"\n{'='*60}")
print(f"📊 Summary:")
print(f" Total repos: {stats['total']}")
print(f" Generated: {stats['generated']}")
print(f" Skipped (already exist): {stats['skipped']}")
print(f" Errors: {stats['errors']}")
if dry_run:
print(f"\n💡 This was a dry run. Remove --dry-run to generate files.")
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\n\n⚠️ Interrupted by user")
sys.exit(1)
except Exception as e:
print(f"\n❌ Fatal error: {e}")
import traceback
traceback.print_exc()
sys.exit(1)