chineseclass/debug_pdf_placement.py

#!/usr/bin/env python3
"""
Debug script to visualize text placement on PDF.
Creates a PDF with VISIBLE text boxes to check positioning.
"""

import json
import argparse
from pathlib import Path
from PIL import Image
from pdf2image import convert_from_path
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader
from reportlab.lib.colors import red, blue, green
import tempfile
import os


def debug_placement(json_path: str, output_path: str, page_num: int = 0):
    """Create debug PDF with visible text boxes."""

    json_path = Path(json_path)
    with open(json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    source_pdf = Path(data.get("source_pdf", ""))
    if not source_pdf.exists():
        raise FileNotFoundError(f"Source PDF not found: {source_pdf}")

    print(f"Converting PDF to images...")
    images = convert_from_path(str(source_pdf), dpi=150)

    if page_num >= len(images):
        print(f"Page {page_num} not found, using page 0")
        page_num = 0

    img = images[page_num]
    img_width, img_height = img.size

    print(f"Image size: {img_width} x {img_height}")

    # Create PDF
    c = canvas.Canvas(output_path)

    # Set page size to match image
    aspect_ratio = img_width / img_height
    pdf_height = 792
    pdf_width = pdf_height * aspect_ratio

    c.setPageSize((pdf_width, pdf_height))
    print(f"PDF size: {pdf_width} x {pdf_height}")

    # Draw image
    with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
        img.save(tmp.name, 'PNG')
        tmp_path = tmp.name

    try:
        img_reader = ImageReader(tmp_path)
        c.drawImage(img_reader, 0, 0, width=pdf_width, height=pdf_height)

        # Get page data
        page_data = data.get("pages", [])[page_num]
        details = page_data.get("details", [])

        print(f"Found {len(details)} text blocks")

        # Draw text boxes
        for i, detail in enumerate(details):
            text = detail.get("text", "").strip()
            if not text:
                continue

            bbox = detail.get("bbox", [])
            if not bbox or len(bbox) != 4:
                continue

            # Get bbox coordinates
            xs = [point[0] for point in bbox]
            ys = [point[1] for point in bbox]

            min_x = min(xs)
            max_x = max(xs)
            min_y = min(ys)
            max_y = max(ys)

            # Convert to PDF coordinates
            x = (min_x / img_width) * pdf_width
            width = ((max_x - min_x) / img_width) * pdf_width

            # Y coordinate is inverted
            y = pdf_height - ((max_y / img_height) * pdf_height)
            height = ((max_y - min_y) / img_height) * pdf_height

            # Draw red rectangle around bbox
            c.setStrokeColor(red)
            c.setLineWidth(1)
            c.rect(x, y, width, height, stroke=1, fill=0)

            # Calculate font size
            font_size = height * 0.8

            # Try to fit text width
            text_width = c.stringWidth(text, "Helvetica", font_size)
            if text_width > width:
                font_size = font_size * (width / text_width) * 0.9

            font_size = max(1, font_size)

            # Draw VISIBLE text in blue
            c.setFillColor(blue)
            c.setFont("Helvetica", font_size)
            c.drawString(x, y, text)

            # Draw text in green at top-left for comparison
            c.setFillColor(green)
            small_font = min(8, font_size)
            c.setFont("Helvetica", small_font)
            c.drawString(x, y + height, f"{i}: {text[:10]}")

            if i < 10:  # Print first 10 for debugging
                print(f"  Block {i}: '{text[:20]}' at ({x:.1f}, {y:.1f}) size {width:.1f}x{height:.1f} font={font_size:.1f}")

        c.showPage()
        c.save()

        print(f"\nDebug PDF created: {output_path}")
        print(f"Red boxes = detected text areas")
        print(f"Blue text = positioned text")
        print(f"Green text = block numbers")

    finally:
        os.unlink(tmp_path)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Debug PDF text placement")
    parser.add_argument("-i", "--input", required=True, help="Input JSON file")
    parser.add_argument("-o", "--output", required=True, help="Output debug PDF")
    parser.add_argument("-p", "--page", type=int, default=0, help="Page number (default: 0)")

    args = parser.parse_args()

    debug_placement(args.input, args.output, args.page)