#!/usr/bin/env python3 """ Markdown to PDF converter with proper UTF-8 support using fpdf2 """ import markdown from fpdf import FPDF import re import os class PDF(FPDF): def __init__(self): super().__init__() self.set_auto_page_break(auto=True, margin=15) def header(self): # Header with title pass def footer(self): # Page number self.set_y(-15) self.set_font('Arial', 'I', 8) self.set_text_color(128, 128, 128) self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C') class MarkdownToPDF: def __init__(self, input_file, output_file): self.input_file = input_file self.output_file = output_file self.pdf = PDF() self.pdf.add_page() def convert(self): # Read markdown with open(self.input_file, 'r', encoding='utf-8') as f: md_content = f.read() # Parse line by line lines = md_content.split('\n') for line in lines: line = line.strip() if not line: self.pdf.ln(3) continue # Main title (H1) if line.startswith('# ') and not line.startswith('## '): title = line[2:].strip() self.pdf.set_font('Arial', 'B', 20) self.pdf.set_text_color(44, 62, 80) # #2C3E50 self.pdf.multi_cell(0, 10, title, align='C') self.pdf.ln(5) # H2 elif line.startswith('## ') and not line.startswith('### '): title = line[3:].strip() self.pdf.ln(3) self.pdf.set_font('Arial', 'B', 16) self.pdf.set_text_color(44, 62, 80) self.pdf.multi_cell(0, 8, title) self.pdf.ln(2) # H3 elif line.startswith('### ') and not line.startswith('#### '): title = line[4:].strip() self.pdf.ln(2) self.pdf.set_font('Arial', 'B', 14) self.pdf.set_text_color(39, 174, 96) # #27AE60 self.pdf.multi_cell(0, 7, title) self.pdf.ln(1) # H4 elif line.startswith('#### '): title = line[5:].strip() self.pdf.ln(1) self.pdf.set_font('Arial', 'B', 12) self.pdf.set_text_color(44, 62, 80) self.pdf.multi_cell(0, 6, title) # Horizontal rule elif line.startswith('---'): self.pdf.ln(2) # Code block start/end (```) elif line.startswith('```'): continue # List item elif line.startswith('- ') or line.startswith('* '): text = line[2:].strip() # Remove markdown formatting text = self._clean_markdown(text) self.pdf.set_font('Arial', '', 10) self.pdf.set_text_color(51, 51, 51) self.pdf.set_x(self.pdf.l_margin + 5) self.pdf.multi_cell(0, 5, f'• {text}') # Blockquote elif line.startswith('>'): text = line[1:].strip() text = self._clean_markdown(text) self.pdf.set_font('Arial', 'I', 10) self.pdf.set_text_color(85, 85, 85) self.pdf.set_fill_color(245, 245, 245) self.pdf.multi_cell(0, 5, text, fill=True) # Table separator elif line.startswith('|') and '---' in line: continue # Table row elif line.startswith('|'): continue # Skip tables for now (complex to render) # Normal paragraph else: # Skip if it's likely a table header or separator if '|' in line: continue text = self._clean_markdown(line) if text: self.pdf.set_font('Arial', '', 10) self.pdf.set_text_color(51, 51, 51) self.pdf.multi_cell(0, 5, text) # Save PDF self.pdf.output(self.output_file) print(f"PDF created successfully: {self.output_file}") def _clean_markdown(self, text): """Remove markdown formatting""" # Remove bold **text** text = re.sub(r'\*\*(.*?)\*\*', r'\1', text) # Remove italic *text* text = re.sub(r'\*(.*?)\*', r'\1', text) # Remove inline code `text` text = re.sub(r'`(.*?)`', r'\1', text) # Remove links [text](url) text = re.sub(r'\[(.*?)\]\(.*?\)', r'\1', text) return text if __name__ == '__main__': input_file = r'C:\Users\alexi\Documents\projects\freelance-dashboard\TECHNICAL_REFERENCE_EN.md' output_file = r'C:\Users\alexi\Documents\projects\freelance-dashboard\TECHNICAL_REFERENCE_EN.pdf' converter = MarkdownToPDF(input_file, output_file) converter.convert()