Created automated generator for project tracking .md files. Features: - Inspects repositories to extract information - Detects tech stack from file extensions - Extracts descriptions from CLAUDE.md/README.md - Extracts status from documentation - Gets git history (last commit info) - Analyzes directory structure - Generates .md files from template Usage: python tools/generate_project_files.py # Generate all missing python tools/generate_project_files.py --dry-run # Preview only python tools/generate_project_files.py --repo RepoName # Single repo Template system: - BASE.md template with category-specific labels - UTF-8/emoji support for Windows - Extensible for future enhancements Part of Gap 3 resolution (missing project tracking files). Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
454 lines
14 KiB
Python
454 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Generate missing project tracking .md files by inspecting repositories.
|
|
|
|
Usage:
|
|
python tools/generate_project_files.py [--dry-run] [--repo REPO_NAME]
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from typing import Dict, List, Optional
|
|
import subprocess
|
|
|
|
# Force UTF-8 encoding for Windows console (emoji support)
|
|
os.environ['PYTHONIOENCODING'] = 'utf-8'
|
|
if hasattr(sys.stdout, 'reconfigure'):
|
|
sys.stdout.reconfigure(encoding='utf-8')
|
|
|
|
# Paths
|
|
SCRIPT_DIR = Path(__file__).parent
|
|
PROJECT_TRACKER = SCRIPT_DIR.parent
|
|
CONFIG_FILE = SCRIPT_DIR / "config.json"
|
|
TEMPLATE_FILE = SCRIPT_DIR / "templates" / "BASE.md"
|
|
|
|
# Category names
|
|
CATEGORY_NAMES = {
|
|
"META": "Meta-Project (Infrastructure/Coordination)",
|
|
"CONSTANT": "Constant (Ongoing/Maintenance)",
|
|
"WIP": "Work In Progress (Active Development)",
|
|
"CONCEPT": "Concept (Idea Stage)",
|
|
"PAUSE": "Paused (Temporarily Inactive)",
|
|
"DONE": "Done (Completed/Archived)"
|
|
}
|
|
|
|
# Language detection
|
|
LANGUAGE_EXTENSIONS = {
|
|
".py": "Python",
|
|
".js": "JavaScript",
|
|
".ts": "TypeScript",
|
|
".cpp": "C++",
|
|
".c": "C",
|
|
".h": "C/C++ Headers",
|
|
".hpp": "C++ Headers",
|
|
".java": "Java",
|
|
".cs": "C#",
|
|
".go": "Go",
|
|
".rs": "Rust",
|
|
".rb": "Ruby",
|
|
".php": "PHP",
|
|
".swift": "Swift",
|
|
".kt": "Kotlin",
|
|
".md": "Markdown",
|
|
".json": "JSON",
|
|
".yaml": "YAML",
|
|
".yml": "YAML",
|
|
".sh": "Shell Script",
|
|
".bat": "Batch Script",
|
|
".ps1": "PowerShell",
|
|
}
|
|
|
|
|
|
def load_json(path: Path) -> dict:
|
|
"""Load JSON file"""
|
|
with open(path, 'r', encoding='utf-8') as f:
|
|
return json.load(f)
|
|
|
|
|
|
def load_template() -> str:
|
|
"""Load base template"""
|
|
with open(TEMPLATE_FILE, 'r', encoding='utf-8') as f:
|
|
return f.read()
|
|
|
|
|
|
def get_category_from_path(project_file: str) -> str:
|
|
"""Extract category from project file path
|
|
|
|
Example: projects/WIP/repo.md -> WIP
|
|
"""
|
|
parts = project_file.split('/')
|
|
if len(parts) >= 2 and parts[0] == "projects":
|
|
return parts[1]
|
|
return "UNCATEGORIZED"
|
|
|
|
|
|
def read_file_safe(path: Path, max_lines: int = 50) -> str:
|
|
"""Read file with error handling"""
|
|
try:
|
|
with open(path, 'r', encoding='utf-8', errors='ignore') as f:
|
|
lines = []
|
|
for i, line in enumerate(f):
|
|
if i >= max_lines:
|
|
break
|
|
lines.append(line.rstrip())
|
|
return '\n'.join(lines)
|
|
except Exception:
|
|
return ""
|
|
|
|
|
|
def detect_languages(repo_path: Path) -> List[str]:
|
|
"""Detect programming languages from file extensions"""
|
|
languages = set()
|
|
|
|
try:
|
|
for root, dirs, files in os.walk(repo_path):
|
|
# Skip common ignore directories
|
|
dirs[:] = [d for d in dirs if d not in {'.git', 'node_modules', '__pycache__', 'build', 'dist', '.venv', 'venv'}]
|
|
|
|
for file in files:
|
|
ext = Path(file).suffix.lower()
|
|
if ext in LANGUAGE_EXTENSIONS:
|
|
languages.add(LANGUAGE_EXTENSIONS[ext])
|
|
except Exception:
|
|
pass
|
|
|
|
return sorted(languages)
|
|
|
|
|
|
def extract_description_from_claude_md(content: str) -> str:
|
|
"""Extract description from CLAUDE.md"""
|
|
lines = content.split('\n')
|
|
|
|
# Look for first heading or first paragraph
|
|
description_lines = []
|
|
in_description = False
|
|
|
|
for line in lines:
|
|
stripped = line.strip()
|
|
|
|
# Skip title/headers until we find content
|
|
if stripped.startswith('#'):
|
|
if description_lines: # If we already have content, stop at next header
|
|
break
|
|
continue
|
|
|
|
# Skip empty lines at start
|
|
if not stripped and not description_lines:
|
|
continue
|
|
|
|
# Collect content
|
|
if stripped:
|
|
description_lines.append(stripped)
|
|
if len(description_lines) >= 3: # First 3 non-empty lines
|
|
break
|
|
|
|
return ' '.join(description_lines[:2]) if description_lines else "No description available."
|
|
|
|
|
|
def extract_status_from_claude_md(content: str) -> str:
|
|
"""Extract current status from CLAUDE.md"""
|
|
lines = content.split('\n')
|
|
|
|
# Look for status-related sections
|
|
status_keywords = ['statut', 'status', 'current', 'phase', 'état']
|
|
status_lines = []
|
|
|
|
for i, line in enumerate(lines):
|
|
lower = line.lower()
|
|
|
|
# Check if line contains status keyword
|
|
if any(kw in lower for kw in status_keywords):
|
|
# Collect next few non-empty lines
|
|
for j in range(i, min(i+5, len(lines))):
|
|
stripped = lines[j].strip()
|
|
if stripped and not stripped.startswith('#'):
|
|
status_lines.append(stripped)
|
|
if len(status_lines) >= 2:
|
|
break
|
|
break
|
|
|
|
return ' '.join(status_lines) if status_lines else "Status unknown - inspect repository."
|
|
|
|
|
|
def get_last_commit_info(repo_path: Path) -> Dict[str, str]:
|
|
"""Get last commit date and message"""
|
|
try:
|
|
# Check if it's a git repo
|
|
git_dir = repo_path / ".git"
|
|
if not git_dir.exists():
|
|
return {"date": "Unknown", "message": "Not a git repository"}
|
|
|
|
# Get last commit date
|
|
result = subprocess.run(
|
|
["git", "-C", str(repo_path), "log", "-1", "--format=%ar"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=5
|
|
)
|
|
date = result.stdout.strip() if result.returncode == 0 else "Unknown"
|
|
|
|
# Get last commit message
|
|
result = subprocess.run(
|
|
["git", "-C", str(repo_path), "log", "-1", "--format=%s"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=5
|
|
)
|
|
message = result.stdout.strip() if result.returncode == 0 else "No commits"
|
|
|
|
return {"date": date, "message": message}
|
|
except Exception:
|
|
return {"date": "Unknown", "message": "Error reading git history"}
|
|
|
|
|
|
def get_directory_structure(repo_path: Path) -> List[str]:
|
|
"""Get high-level directory structure"""
|
|
try:
|
|
dirs = []
|
|
for item in sorted(repo_path.iterdir()):
|
|
if item.is_dir() and not item.name.startswith('.'):
|
|
dirs.append(item.name)
|
|
return dirs[:10] # Top 10 directories
|
|
except Exception:
|
|
return []
|
|
|
|
|
|
def inspect_repo(repo_name: str, repo_path: Path) -> Dict[str, any]:
|
|
"""Inspect repository and extract information"""
|
|
|
|
info = {
|
|
"repo_exists": repo_path.exists(),
|
|
"has_claude_md": False,
|
|
"has_readme": False,
|
|
"has_todo": False,
|
|
"claude_content": "",
|
|
"readme_content": "",
|
|
"todo_content": "",
|
|
"languages": [],
|
|
"last_commit": {},
|
|
"directories": []
|
|
}
|
|
|
|
if not repo_path.exists():
|
|
return info
|
|
|
|
# Check for CLAUDE.md
|
|
claude_path = repo_path / "CLAUDE.md"
|
|
if claude_path.exists():
|
|
info["has_claude_md"] = True
|
|
info["claude_content"] = read_file_safe(claude_path, max_lines=100)
|
|
|
|
# Check for README
|
|
for readme_name in ["README.md", "Readme.md", "readme.md"]:
|
|
readme_path = repo_path / readme_name
|
|
if readme_path.exists():
|
|
info["has_readme"] = True
|
|
info["readme_content"] = read_file_safe(readme_path, max_lines=50)
|
|
break
|
|
|
|
# Check for TODO.md
|
|
todo_path = repo_path / "TODO.md"
|
|
if todo_path.exists():
|
|
info["has_todo"] = True
|
|
info["todo_content"] = read_file_safe(todo_path, max_lines=20)
|
|
|
|
# Detect languages
|
|
info["languages"] = detect_languages(repo_path)
|
|
|
|
# Get last commit
|
|
info["last_commit"] = get_last_commit_info(repo_path)
|
|
|
|
# Get directory structure
|
|
info["directories"] = get_directory_structure(repo_path)
|
|
|
|
return info
|
|
|
|
|
|
def generate_project_md(repo_name: str, category: str, info: Dict, template: str) -> str:
|
|
"""Generate project .md content from template and extracted info"""
|
|
|
|
# Extract description
|
|
if info["has_claude_md"]:
|
|
description = extract_description_from_claude_md(info["claude_content"])
|
|
elif info["has_readme"]:
|
|
description = extract_description_from_claude_md(info["readme_content"]) # Same logic works
|
|
else:
|
|
description = "No description available - repository needs documentation."
|
|
|
|
# Tech stack
|
|
if info["languages"]:
|
|
tech_stack = ", ".join(info["languages"])
|
|
else:
|
|
tech_stack = "Unknown - inspect repository"
|
|
|
|
# Current status
|
|
if info["has_claude_md"]:
|
|
current_status = extract_status_from_claude_md(info["claude_content"])
|
|
else:
|
|
last_commit = info["last_commit"]
|
|
current_status = f"Last commit: {last_commit['message']} ({last_commit['date']})"
|
|
|
|
# Key features
|
|
key_features = "- See CLAUDE.md for detailed features" if info["has_claude_md"] else "- Features not documented yet"
|
|
|
|
# Structure
|
|
if info["directories"]:
|
|
structure = "```\n" + "\n".join([f"{d}/" for d in info["directories"]]) + "\n```"
|
|
else:
|
|
structure = "Structure not analyzed yet."
|
|
|
|
# Next steps
|
|
if info["has_todo"]:
|
|
todo_lines = [line.strip() for line in info["todo_content"].split('\n') if line.strip() and not line.startswith('#')]
|
|
next_steps = "\n".join([f"- {line}" for line in todo_lines[:5]])
|
|
else:
|
|
next_steps = "- See TODO.md or project plans for next steps"
|
|
|
|
# Notes
|
|
notes_parts = []
|
|
if not info["repo_exists"]:
|
|
notes_parts.append("⚠️ Repository not found in expected location")
|
|
if not info["has_claude_md"]:
|
|
notes_parts.append("⚠️ No CLAUDE.md found - add project documentation")
|
|
|
|
notes = "\n".join(notes_parts) if notes_parts else "None"
|
|
|
|
# Project type (infer from languages/structure - prioritize main language)
|
|
project_type = "Unknown"
|
|
lang_list = info["languages"]
|
|
|
|
# Prioritize based on typical primary languages
|
|
if "C++" in lang_list or "C" in lang_list:
|
|
project_type = "C++ Application"
|
|
elif "Python" in lang_list:
|
|
project_type = "Python Application"
|
|
elif "JavaScript" in lang_list or "TypeScript" in lang_list:
|
|
project_type = "JavaScript/Node.js Application"
|
|
elif "C#" in lang_list:
|
|
project_type = "C# Application"
|
|
elif "Rust" in lang_list:
|
|
project_type = "Rust Application"
|
|
elif "Go" in lang_list:
|
|
project_type = "Go Application"
|
|
|
|
# Fill template
|
|
today = datetime.now().strftime("%Y-%m-%d")
|
|
content = template.format(
|
|
project_name=repo_name,
|
|
category=category,
|
|
category_full=CATEGORY_NAMES.get(category, category),
|
|
project_type=project_type,
|
|
repo_name=repo_name,
|
|
description=description,
|
|
tech_stack=tech_stack,
|
|
current_status=current_status,
|
|
key_features=key_features,
|
|
structure=structure,
|
|
next_steps=next_steps,
|
|
notes=notes,
|
|
created_date=today,
|
|
updated_date=today
|
|
)
|
|
|
|
return content
|
|
|
|
|
|
def main():
|
|
"""Main function"""
|
|
|
|
# Parse arguments
|
|
dry_run = "--dry-run" in sys.argv
|
|
specific_repo = None
|
|
if "--repo" in sys.argv:
|
|
idx = sys.argv.index("--repo")
|
|
if idx + 1 < len(sys.argv):
|
|
specific_repo = sys.argv[idx + 1]
|
|
|
|
# Load config
|
|
config = load_json(CONFIG_FILE)
|
|
repos_root = Path(config["repos_root"])
|
|
project_mapping = config["project_mapping"]
|
|
|
|
# Load template
|
|
template = load_template()
|
|
|
|
# Stats
|
|
stats = {
|
|
"total": 0,
|
|
"generated": 0,
|
|
"skipped": 0,
|
|
"errors": 0
|
|
}
|
|
|
|
print("🔍 ProjectTracker - Project File Generator\n")
|
|
|
|
# Process each repository
|
|
for repo_name, project_file in project_mapping.items():
|
|
# Filter if specific repo requested
|
|
if specific_repo and repo_name != specific_repo:
|
|
continue
|
|
|
|
stats["total"] += 1
|
|
|
|
# Check if file already exists
|
|
project_path = PROJECT_TRACKER / project_file
|
|
if project_path.exists():
|
|
print(f"⏭️ {repo_name} - Already exists, skipping")
|
|
stats["skipped"] += 1
|
|
continue
|
|
|
|
# Inspect repository
|
|
repo_path = repos_root / repo_name
|
|
print(f"📂 {repo_name} - Inspecting...")
|
|
|
|
try:
|
|
info = inspect_repo(repo_name, repo_path)
|
|
category = get_category_from_path(project_file)
|
|
content = generate_project_md(repo_name, category, info, template)
|
|
|
|
if dry_run:
|
|
print(f" [DRY RUN] Would generate: {project_file}")
|
|
print(f" Languages: {', '.join(info['languages']) if info['languages'] else 'None'}")
|
|
print(f" Has CLAUDE.md: {info['has_claude_md']}")
|
|
else:
|
|
# Ensure directory exists
|
|
project_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Write file
|
|
with open(project_path, 'w', encoding='utf-8') as f:
|
|
f.write(content)
|
|
|
|
print(f"✅ {repo_name} - Generated: {project_file}")
|
|
stats["generated"] += 1
|
|
|
|
except Exception as e:
|
|
print(f"❌ {repo_name} - Error: {e}")
|
|
stats["errors"] += 1
|
|
|
|
# Summary
|
|
print(f"\n{'='*60}")
|
|
print(f"📊 Summary:")
|
|
print(f" Total repos: {stats['total']}")
|
|
print(f" Generated: {stats['generated']}")
|
|
print(f" Skipped (already exist): {stats['skipped']}")
|
|
print(f" Errors: {stats['errors']}")
|
|
|
|
if dry_run:
|
|
print(f"\n💡 This was a dry run. Remove --dry-run to generate files.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
main()
|
|
except KeyboardInterrupt:
|
|
print("\n\n⚠️ Interrupted by user")
|
|
sys.exit(1)
|
|
except Exception as e:
|
|
print(f"\n❌ Fatal error: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
sys.exit(1)
|