#!/usr/bin/env python3 """ Debug script to visualize text placement on PDF. Creates a PDF with VISIBLE text boxes to check positioning. """ import json import argparse from pathlib import Path from PIL import Image from pdf2image import convert_from_path from reportlab.pdfgen import canvas from reportlab.lib.utils import ImageReader from reportlab.lib.colors import red, blue, green import tempfile import os def debug_placement(json_path: str, output_path: str, page_num: int = 0): """Create debug PDF with visible text boxes.""" json_path = Path(json_path) with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f) source_pdf = Path(data.get("source_pdf", "")) if not source_pdf.exists(): raise FileNotFoundError(f"Source PDF not found: {source_pdf}") print(f"Converting PDF to images...") images = convert_from_path(str(source_pdf), dpi=150) if page_num >= len(images): print(f"Page {page_num} not found, using page 0") page_num = 0 img = images[page_num] img_width, img_height = img.size print(f"Image size: {img_width} x {img_height}") # Create PDF c = canvas.Canvas(output_path) # Set page size to match image aspect_ratio = img_width / img_height pdf_height = 792 pdf_width = pdf_height * aspect_ratio c.setPageSize((pdf_width, pdf_height)) print(f"PDF size: {pdf_width} x {pdf_height}") # Draw image with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp: img.save(tmp.name, 'PNG') tmp_path = tmp.name try: img_reader = ImageReader(tmp_path) c.drawImage(img_reader, 0, 0, width=pdf_width, height=pdf_height) # Get page data page_data = data.get("pages", [])[page_num] details = page_data.get("details", []) print(f"Found {len(details)} text blocks") # Draw text boxes for i, detail in enumerate(details): text = detail.get("text", "").strip() if not text: continue bbox = detail.get("bbox", []) if not bbox or len(bbox) != 4: continue # Get bbox coordinates xs = [point[0] for point in bbox] ys = [point[1] for point in bbox] min_x = min(xs) max_x = max(xs) min_y = min(ys) max_y = max(ys) # Convert to PDF coordinates x = (min_x / img_width) * pdf_width width = ((max_x - min_x) / img_width) * pdf_width # Y coordinate is inverted y = pdf_height - ((max_y / img_height) * pdf_height) height = ((max_y - min_y) / img_height) * pdf_height # Draw red rectangle around bbox c.setStrokeColor(red) c.setLineWidth(1) c.rect(x, y, width, height, stroke=1, fill=0) # Calculate font size font_size = height * 0.8 # Try to fit text width text_width = c.stringWidth(text, "Helvetica", font_size) if text_width > width: font_size = font_size * (width / text_width) * 0.9 font_size = max(1, font_size) # Draw VISIBLE text in blue c.setFillColor(blue) c.setFont("Helvetica", font_size) c.drawString(x, y, text) # Draw text in green at top-left for comparison c.setFillColor(green) small_font = min(8, font_size) c.setFont("Helvetica", small_font) c.drawString(x, y + height, f"{i}: {text[:10]}") if i < 10: # Print first 10 for debugging print(f" Block {i}: '{text[:20]}' at ({x:.1f}, {y:.1f}) size {width:.1f}x{height:.1f} font={font_size:.1f}") c.showPage() c.save() print(f"\nDebug PDF created: {output_path}") print(f"Red boxes = detected text areas") print(f"Blue text = positioned text") print(f"Green text = block numbers") finally: os.unlink(tmp_path) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Debug PDF text placement") parser.add_argument("-i", "--input", required=True, help="Input JSON file") parser.add_argument("-o", "--output", required=True, help="Output debug PDF") parser.add_argument("-p", "--page", type=int, default=0, help="Page number (default: 0)") args = parser.parse_args() debug_placement(args.input, args.output, args.page)