Remove ClassGenSystem and obsolete tools from tracking, update course content files with latest revisions. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
83 lines
2.8 KiB
Python
83 lines
2.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Diagnostic script to understand the alignment issue.
|
|
Prints precise coordinates to find the correction pattern.
|
|
"""
|
|
|
|
import json
|
|
from pdf2image import convert_from_path
|
|
|
|
json_path = "Raw/DevelopChinese/OCR/听力Intro.json"
|
|
pdf_path = "Raw/DevelopChinese/PDF/听力Intro.pdf"
|
|
|
|
# Load JSON
|
|
with open(json_path) as f:
|
|
data = json.load(f)
|
|
|
|
page = data['pages'][0]
|
|
details = [d for d in page['details'] if d.get('bbox') and d.get('text')]
|
|
|
|
# Sort by Y coordinate
|
|
details_sorted = sorted(details, key=lambda d: min(p[1] for p in d['bbox']))
|
|
|
|
# Convert PDF
|
|
imgs = convert_from_path(pdf_path, dpi=300, first_page=1, last_page=1)
|
|
img_width, img_height = imgs[0].size
|
|
|
|
# Get OCR dimensions
|
|
max_x = max(max(p[0] for p in d['bbox']) for d in details)
|
|
max_y = max(max(p[1] for p in d['bbox']) for d in details)
|
|
ocr_width = int(max_x * 1.02)
|
|
ocr_height = int(max_y * 1.02)
|
|
|
|
print(f"Image dimensions: {img_width} x {img_height}")
|
|
print(f"OCR dimensions: {ocr_width} x {ocr_height}")
|
|
print(f"\n{'='*80}")
|
|
print("BBOX COORDINATES ANALYSIS")
|
|
print(f"{'='*80}\n")
|
|
|
|
# Analyze top, middle, bottom blocks
|
|
test_indices = [0, len(details_sorted)//2, -1]
|
|
positions = ["TOP", "MIDDLE", "BOTTOM"]
|
|
|
|
for idx, pos in zip(test_indices, positions):
|
|
detail = details_sorted[idx]
|
|
text = detail['text'][:30]
|
|
bbox = detail['bbox']
|
|
|
|
# Get bbox bounds
|
|
ys = [p[1] for p in bbox]
|
|
min_y = min(ys)
|
|
max_y = max(ys)
|
|
|
|
print(f"{pos} BLOCK: \"{text}\"")
|
|
print(f" OCR Y coordinates: min_y={min_y}, max_y={max_y}")
|
|
|
|
# Current formula (what we're using)
|
|
# y_pdf = pdf_height - ((max_y / ocr_height) * pdf_height)
|
|
pdf_height = float(ocr_height) # Using OCR dims as PDF dims
|
|
y_current = pdf_height - ((max_y / ocr_height) * pdf_height)
|
|
|
|
print(f" Current calculation: y_pdf = {pdf_height} - (({max_y}/{ocr_height}) * {pdf_height})")
|
|
print(f" Current result: y_pdf = {y_current:.1f}")
|
|
|
|
# Where SHOULD it be? (assuming 1:1 mapping)
|
|
# In OCR space: text is at Y = min_y to max_y
|
|
# In PDF space (origin bottom-left): text should be at Y = (pdf_height - max_y) to (pdf_height - min_y)
|
|
y_should_be = pdf_height - max_y
|
|
|
|
print(f" Expected (1:1): y_pdf = {pdf_height} - {max_y} = {y_should_be:.1f}")
|
|
print(f" Difference: {y_current - y_should_be:.1f} points")
|
|
print()
|
|
|
|
print(f"{'='*80}")
|
|
print("DIAGNOSTIC QUESTIONS:")
|
|
print(f"{'='*80}\n")
|
|
print("If all differences are ~0, the formula is correct but something else is wrong.")
|
|
print("If TOP has positive diff and BOTTOM has negative diff, there's a scaling issue.")
|
|
print("If all have the same diff, there's an offset issue.")
|
|
print("\nLook at the PDF in debug mode and tell me:")
|
|
print("1. For TOP block: red box is HOW MANY pixels too high/low?")
|
|
print("2. For MIDDLE block: red box is HOW MANY pixels too high/low?")
|
|
print("3. For BOTTOM block: red box is HOW MANY pixels too high/low?")
|