couple-repo/.claude/skills/pptx/ooxml/scripts/pack.py
StillHammer f5aa93bcbd Initial commit: Couple matters documentation + PowerPoint skill
Documentation personnelle complète
- CLAUDE.md : Instructions compactes et enrichies
- personnalités/ : Profils Alexis, Tingting, Ben, Xiaoxiao + TingtingWork.md
- couple_backlog/ : Historique conflits (16-22 octobre 2025)
- conversation_topics/ : Système suivi sujets actifs
- Projects/ : Analyses techniques et projets
- ToRemember/ : Leadership socratique, suivi conversations
- Promesses_à_tenir.md, observations_patterns.md

PowerPoint skill
- .claude/skills/pptx/ : Skill officiel Anthropic (html2pptx)
- Identité visuelle Tingting : Bordeaux + Or antique + Crème
- Exemple : personnalités/Tingting_Class73_Elegant.pptx

Organisation
- planning/, stratégie/, topics/, plan_discussion/
- .gitignore : node_modules, *.pptx (sauf personnalités/), HTML/JS temp

🎯 Repo propre : 129 fichiers essentiels, 0 dependencies
2025-10-24 14:54:57 +08:00

160 lines
5.5 KiB
Python

#!/usr/bin/env python3
"""
Tool to pack a directory into a .docx, .pptx, or .xlsx file with XML formatting undone.
Example usage:
python pack.py <input_directory> <office_file> [--force]
"""
import argparse
import shutil
import subprocess
import sys
import tempfile
import defusedxml.minidom
import zipfile
from pathlib import Path
def main():
parser = argparse.ArgumentParser(description="Pack a directory into an Office file")
parser.add_argument("input_directory", help="Unpacked Office document directory")
parser.add_argument("output_file", help="Output Office file (.docx/.pptx/.xlsx)")
parser.add_argument("--force", action="store_true", help="Skip validation")
args = parser.parse_args()
try:
success = pack_document(
args.input_directory, args.output_file, validate=not args.force
)
# Show warning if validation was skipped
if args.force:
print("Warning: Skipped validation, file may be corrupt", file=sys.stderr)
# Exit with error if validation failed
elif not success:
print("Contents would produce a corrupt file.", file=sys.stderr)
print("Please validate XML before repacking.", file=sys.stderr)
print("Use --force to skip validation and pack anyway.", file=sys.stderr)
sys.exit(1)
except ValueError as e:
sys.exit(f"Error: {e}")
def pack_document(input_dir, output_file, validate=False):
"""Pack a directory into an Office file (.docx/.pptx/.xlsx).
Args:
input_dir: Path to unpacked Office document directory
output_file: Path to output Office file
validate: If True, validates with soffice (default: False)
Returns:
bool: True if successful, False if validation failed
"""
input_dir = Path(input_dir)
output_file = Path(output_file)
if not input_dir.is_dir():
raise ValueError(f"{input_dir} is not a directory")
if output_file.suffix.lower() not in {".docx", ".pptx", ".xlsx"}:
raise ValueError(f"{output_file} must be a .docx, .pptx, or .xlsx file")
# Work in temporary directory to avoid modifying original
with tempfile.TemporaryDirectory() as temp_dir:
temp_content_dir = Path(temp_dir) / "content"
shutil.copytree(input_dir, temp_content_dir)
# Process XML files to remove pretty-printing whitespace
for pattern in ["*.xml", "*.rels"]:
for xml_file in temp_content_dir.rglob(pattern):
condense_xml(xml_file)
# Create final Office file as zip archive
output_file.parent.mkdir(parents=True, exist_ok=True)
with zipfile.ZipFile(output_file, "w", zipfile.ZIP_DEFLATED) as zf:
for f in temp_content_dir.rglob("*"):
if f.is_file():
zf.write(f, f.relative_to(temp_content_dir))
# Validate if requested
if validate:
if not validate_document(output_file):
output_file.unlink() # Delete the corrupt file
return False
return True
def validate_document(doc_path):
"""Validate document by converting to HTML with soffice."""
# Determine the correct filter based on file extension
match doc_path.suffix.lower():
case ".docx":
filter_name = "html:HTML"
case ".pptx":
filter_name = "html:impress_html_Export"
case ".xlsx":
filter_name = "html:HTML (StarCalc)"
with tempfile.TemporaryDirectory() as temp_dir:
try:
result = subprocess.run(
[
"soffice",
"--headless",
"--convert-to",
filter_name,
"--outdir",
temp_dir,
str(doc_path),
],
capture_output=True,
timeout=10,
text=True,
)
if not (Path(temp_dir) / f"{doc_path.stem}.html").exists():
error_msg = result.stderr.strip() or "Document validation failed"
print(f"Validation error: {error_msg}", file=sys.stderr)
return False
return True
except FileNotFoundError:
print("Warning: soffice not found. Skipping validation.", file=sys.stderr)
return True
except subprocess.TimeoutExpired:
print("Validation error: Timeout during conversion", file=sys.stderr)
return False
except Exception as e:
print(f"Validation error: {e}", file=sys.stderr)
return False
def condense_xml(xml_file):
"""Strip unnecessary whitespace and remove comments."""
with open(xml_file, "r", encoding="utf-8") as f:
dom = defusedxml.minidom.parse(f)
# Process each element to remove whitespace and comments
for element in dom.getElementsByTagName("*"):
# Skip w:t elements and their processing
if element.tagName.endswith(":t"):
continue
# Remove whitespace-only text nodes and comment nodes
for child in list(element.childNodes):
if (
child.nodeType == child.TEXT_NODE
and child.nodeValue
and child.nodeValue.strip() == ""
) or child.nodeType == child.COMMENT_NODE:
element.removeChild(child)
# Write back the condensed XML
with open(xml_file, "wb") as f:
f.write(dom.toxml(encoding="UTF-8"))
if __name__ == "__main__":
main()