#!/usr/bin/env python3 """ Draw actual quadrilaterals (not rectangles) to match rotated text bboxes. """ import json from pathlib import Path from PIL import Image from pdf2image import convert_from_path from reportlab.pdfgen import canvas from reportlab.lib.utils import ImageReader from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont from reportlab.lib.colors import red, blue import tempfile import os # Register Chinese font try: pdfmetrics.registerFont(TTFont('ChineseFont', 'C:/Windows/Fonts/msyh.ttc')) font_name = 'ChineseFont' except: font_name = 'Helvetica' # Paths json_path = "Raw/DevelopChinese/OCR/听力Intro.json" pdf_path = "Raw/DevelopChinese/PDF/听力Intro.pdf" output_path = "test_QUADRILATERAL.pdf" # Load JSON with open(json_path) as f: data = json.load(f) # Convert PDF to image at 300 DPI (same as OCR) print("Converting PDF to image at 300 DPI...") images = convert_from_path(pdf_path, dpi=300) img = images[0] img_width, img_height = img.size print(f"Image size: {img_width} x {img_height}") # Create PDF c = canvas.Canvas(output_path) c.setPageSize((img_width, img_height)) # Draw image with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp: img.save(tmp.name, 'PNG') tmp_path = tmp.name c.drawImage(ImageReader(tmp_path), 0, 0, width=img_width, height=img_height) # Get page data page_data = data['pages'][0] details = page_data.get('details', []) text_count = 0 for detail in details: text = detail.get('text', '').strip() if not text: continue bbox = detail.get('bbox', []) if not bbox or len(bbox) != 4: continue # bbox is [[x0,y0], [x1,y1], [x2,y2], [x3,y3]] in image coordinates # Convert Y (PDF has origin at bottom, image at top) points_pdf = [] for point in bbox: x = point[0] y = img_height - point[1] # Flip Y points_pdf.append((x, y)) # Draw the actual quadrilateral c.setStrokeColor(red) c.setLineWidth(0.5) # Draw lines connecting all 4 points path = c.beginPath() path.moveTo(points_pdf[0][0], points_pdf[0][1]) path.lineTo(points_pdf[1][0], points_pdf[1][1]) path.lineTo(points_pdf[2][0], points_pdf[2][1]) path.lineTo(points_pdf[3][0], points_pdf[3][1]) path.close() c.drawPath(path, stroke=1, fill=0) # For text, use bottom-left corner (point 3) x_text = points_pdf[3][0] y_text = points_pdf[3][1] # Calculate height from bbox min_y = min(p[1] for p in points_pdf) max_y = max(p[1] for p in points_pdf) height = max_y - min_y font_size = max(1, height * 0.7) c.setFont(font_name, font_size) c.setFillColor(blue) c.drawString(x_text, y_text, text) text_count += 1 print(f"Drew {text_count} quadrilaterals") c.showPage() c.save() os.unlink(tmp_path) print(f"\nDone! Check {output_path}") print("Red quadrilaterals should now EXACTLY match the text areas!")