chineseclass/tools/debug_pdf_placement.py
StillHammer a61a32b57f Reorganize repository structure
- Move all Python scripts to tools/ directory
- Move documentation files to docs/ directory
- Create exams/ and homework/ directories for future use
- Remove temporary test file (page1_preview.png)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-27 23:28:39 +08:00

145 lines
4.4 KiB
Python

#!/usr/bin/env python3
"""
Debug script to visualize text placement on PDF.
Creates a PDF with VISIBLE text boxes to check positioning.
"""
import json
import argparse
from pathlib import Path
from PIL import Image
from pdf2image import convert_from_path
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader
from reportlab.lib.colors import red, blue, green
import tempfile
import os
def debug_placement(json_path: str, output_path: str, page_num: int = 0):
"""Create debug PDF with visible text boxes."""
json_path = Path(json_path)
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
source_pdf = Path(data.get("source_pdf", ""))
if not source_pdf.exists():
raise FileNotFoundError(f"Source PDF not found: {source_pdf}")
print(f"Converting PDF to images...")
images = convert_from_path(str(source_pdf), dpi=150)
if page_num >= len(images):
print(f"Page {page_num} not found, using page 0")
page_num = 0
img = images[page_num]
img_width, img_height = img.size
print(f"Image size: {img_width} x {img_height}")
# Create PDF
c = canvas.Canvas(output_path)
# Set page size to match image
aspect_ratio = img_width / img_height
pdf_height = 792
pdf_width = pdf_height * aspect_ratio
c.setPageSize((pdf_width, pdf_height))
print(f"PDF size: {pdf_width} x {pdf_height}")
# Draw image
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
img.save(tmp.name, 'PNG')
tmp_path = tmp.name
try:
img_reader = ImageReader(tmp_path)
c.drawImage(img_reader, 0, 0, width=pdf_width, height=pdf_height)
# Get page data
page_data = data.get("pages", [])[page_num]
details = page_data.get("details", [])
print(f"Found {len(details)} text blocks")
# Draw text boxes
for i, detail in enumerate(details):
text = detail.get("text", "").strip()
if not text:
continue
bbox = detail.get("bbox", [])
if not bbox or len(bbox) != 4:
continue
# Get bbox coordinates
xs = [point[0] for point in bbox]
ys = [point[1] for point in bbox]
min_x = min(xs)
max_x = max(xs)
min_y = min(ys)
max_y = max(ys)
# Convert to PDF coordinates
x = (min_x / img_width) * pdf_width
width = ((max_x - min_x) / img_width) * pdf_width
# Y coordinate is inverted
y = pdf_height - ((max_y / img_height) * pdf_height)
height = ((max_y - min_y) / img_height) * pdf_height
# Draw red rectangle around bbox
c.setStrokeColor(red)
c.setLineWidth(1)
c.rect(x, y, width, height, stroke=1, fill=0)
# Calculate font size
font_size = height * 0.8
# Try to fit text width
text_width = c.stringWidth(text, "Helvetica", font_size)
if text_width > width:
font_size = font_size * (width / text_width) * 0.9
font_size = max(1, font_size)
# Draw VISIBLE text in blue
c.setFillColor(blue)
c.setFont("Helvetica", font_size)
c.drawString(x, y, text)
# Draw text in green at top-left for comparison
c.setFillColor(green)
small_font = min(8, font_size)
c.setFont("Helvetica", small_font)
c.drawString(x, y + height, f"{i}: {text[:10]}")
if i < 10: # Print first 10 for debugging
print(f" Block {i}: '{text[:20]}' at ({x:.1f}, {y:.1f}) size {width:.1f}x{height:.1f} font={font_size:.1f}")
c.showPage()
c.save()
print(f"\nDebug PDF created: {output_path}")
print(f"Red boxes = detected text areas")
print(f"Blue text = positioned text")
print(f"Green text = block numbers")
finally:
os.unlink(tmp_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Debug PDF text placement")
parser.add_argument("-i", "--input", required=True, help="Input JSON file")
parser.add_argument("-o", "--output", required=True, help="Output debug PDF")
parser.add_argument("-p", "--page", type=int, default=0, help="Page number (default: 0)")
args = parser.parse_args()
debug_placement(args.input, args.output, args.page)