Remove ClassGenSystem and obsolete tools from tracking, update course content files with latest revisions. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
124 lines
3.3 KiB
Python
124 lines
3.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
ULTRA-SIMPLE version to test basic alignment.
|
|
No fancy corrections, just raw bbox to PDF mapping.
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from PIL import Image
|
|
from pdf2image import convert_from_path
|
|
from reportlab.pdfgen import canvas
|
|
from reportlab.lib.utils import ImageReader
|
|
from reportlab.pdfbase import pdfmetrics
|
|
from reportlab.pdfbase.ttfonts import TTFont
|
|
from reportlab.lib.colors import red, blue
|
|
import tempfile
|
|
import os
|
|
|
|
# Register Chinese font
|
|
try:
|
|
pdfmetrics.registerFont(TTFont('ChineseFont', 'C:/Windows/Fonts/msyh.ttc'))
|
|
font_name = 'ChineseFont'
|
|
except:
|
|
font_name = 'Helvetica'
|
|
print("Warning: No Chinese font, using Helvetica")
|
|
|
|
# Paths
|
|
json_path = "Raw/DevelopChinese/OCR/听力Intro.json"
|
|
pdf_path = "Raw/DevelopChinese/PDF/听力Intro.pdf"
|
|
output_path = "test_SIMPLE.pdf"
|
|
|
|
# Load JSON
|
|
with open(json_path) as f:
|
|
data = json.load(f)
|
|
|
|
# Convert PDF to image at fixed 300 DPI
|
|
print("Converting PDF to image at 300 DPI...")
|
|
images = convert_from_path(pdf_path, dpi=300)
|
|
img = images[0] # First page only
|
|
img_width, img_height = img.size
|
|
print(f"Image size: {img_width} x {img_height}")
|
|
|
|
# Create PDF with same dimensions as image (1:1 ratio)
|
|
c = canvas.Canvas(output_path)
|
|
c.setPageSize((img_width, img_height))
|
|
|
|
# Save and draw image
|
|
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
|
|
img.save(tmp.name, 'PNG')
|
|
tmp_path = tmp.name
|
|
|
|
img_reader = ImageReader(tmp_path)
|
|
c.drawImage(img_reader, 0, 0, width=img_width, height=img_height)
|
|
|
|
# Get page data
|
|
page_data = data['pages'][0]
|
|
details = page_data.get('details', [])
|
|
|
|
# Find max coordinates from ALL bboxes (this is our "OCR space")
|
|
max_x_ocr = 0
|
|
max_y_ocr = 0
|
|
for detail in details:
|
|
bbox = detail.get('bbox', [])
|
|
if bbox and len(bbox) == 4:
|
|
for point in bbox:
|
|
max_x_ocr = max(max_x_ocr, point[0])
|
|
max_y_ocr = max(max_y_ocr, point[1])
|
|
|
|
print(f"OCR space: 0-{max_x_ocr} x 0-{max_y_ocr}")
|
|
print(f"PDF/Image space: 0-{img_width} x 0-{img_height}")
|
|
print(f"Scale factors: X={img_width/max_x_ocr:.4f}, Y={img_height/max_y_ocr:.4f}")
|
|
|
|
# Draw bboxes
|
|
text_count = 0
|
|
for detail in details:
|
|
text = detail.get('text', '').strip()
|
|
if not text:
|
|
continue
|
|
|
|
bbox = detail.get('bbox', [])
|
|
if not bbox or len(bbox) != 4:
|
|
continue
|
|
|
|
# Get bbox bounds in OCR space
|
|
xs = [p[0] for p in bbox]
|
|
ys = [p[1] for p in bbox]
|
|
min_x = min(xs)
|
|
max_x = max(xs)
|
|
min_y = min(ys)
|
|
max_y = max(ys)
|
|
|
|
# SIMPLE mapping: scale from OCR space to PDF space
|
|
x_pdf = (min_x / max_x_ocr) * img_width
|
|
width_pdf = ((max_x - min_x) / max_x_ocr) * img_width
|
|
|
|
# Y is inverted (PDF origin at bottom)
|
|
y_pdf = img_height - ((max_y / max_y_ocr) * img_height)
|
|
height_pdf = ((max_y - min_y) / max_y_ocr) * img_height
|
|
|
|
# Draw red box
|
|
c.setStrokeColor(red)
|
|
c.setLineWidth(0.5)
|
|
c.rect(x_pdf, y_pdf, width_pdf, height_pdf, stroke=1, fill=0)
|
|
|
|
# Draw blue text
|
|
font_size = max(1, height_pdf * 0.7)
|
|
c.setFont(font_name, font_size)
|
|
c.setFillColor(blue)
|
|
c.drawString(x_pdf, y_pdf, text)
|
|
|
|
text_count += 1
|
|
|
|
print(f"Added {text_count} text blocks")
|
|
|
|
c.showPage()
|
|
c.save()
|
|
|
|
# Cleanup
|
|
os.unlink(tmp_path)
|
|
|
|
print(f"\nDone! Check {output_path}")
|
|
print("If boxes are STILL misaligned, the problem is in the OCR data itself.")
|