import OpenAI from 'openai'; import fs from 'fs'; import path from 'path'; let openai = null; // Max characters per chunk (~6000 tokens ≈ 24000 characters for most languages) const MAX_CHUNK_CHARS = 20000; const LANGUAGES = { en: 'English', fr: 'French', es: 'Spanish', de: 'German', it: 'Italian', pt: 'Portuguese', zh: 'Chinese', ja: 'Japanese', ko: 'Korean', ru: 'Russian', ar: 'Arabic', hi: 'Hindi', nl: 'Dutch', pl: 'Polish', tr: 'Turkish', vi: 'Vietnamese', th: 'Thai', sv: 'Swedish', da: 'Danish', fi: 'Finnish', no: 'Norwegian', cs: 'Czech', el: 'Greek', he: 'Hebrew', id: 'Indonesian', ms: 'Malay', ro: 'Romanian', uk: 'Ukrainian', }; // Sentence ending patterns for different languages const SENTENCE_ENDINGS = /[.!?。!?。\n]/g; /** * Get OpenAI client (lazy initialization) */ function getOpenAI() { if (!openai) { if (!process.env.OPENAI_API_KEY) { throw new Error('OPENAI_API_KEY environment variable is not set'); } openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, }); } return openai; } /** * Split text into chunks at sentence boundaries * @param {string} text - Text to split * @param {number} maxChars - Maximum characters per chunk * @returns {string[]} Array of text chunks */ function splitIntoChunks(text, maxChars = MAX_CHUNK_CHARS) { if (text.length <= maxChars) { return [text]; } const chunks = []; let currentPos = 0; while (currentPos < text.length) { let endPos = currentPos + maxChars; // If we're at the end, just take the rest if (endPos >= text.length) { chunks.push(text.slice(currentPos)); break; } // Find the last sentence ending before maxChars const searchText = text.slice(currentPos, endPos); let lastSentenceEnd = -1; // Find all sentence endings in the search range let match; SENTENCE_ENDINGS.lastIndex = 0; while ((match = SENTENCE_ENDINGS.exec(searchText)) !== null) { lastSentenceEnd = match.index + 1; // Include the punctuation } // If we found a sentence ending, cut there // Otherwise, look for the next sentence ending after maxChars (up to 20% more) if (lastSentenceEnd > maxChars * 0.5) { endPos = currentPos + lastSentenceEnd; } else { // Look forward for a sentence ending (up to 20% more characters) const extendedSearch = text.slice(endPos, endPos + maxChars * 0.2); SENTENCE_ENDINGS.lastIndex = 0; const forwardMatch = SENTENCE_ENDINGS.exec(extendedSearch); if (forwardMatch) { endPos = endPos + forwardMatch.index + 1; } // If still no sentence ending found, just cut at maxChars } chunks.push(text.slice(currentPos, endPos).trim()); currentPos = endPos; // Skip any leading whitespace for the next chunk while (currentPos < text.length && /\s/.test(text[currentPos])) { currentPos++; } } return chunks.filter(chunk => chunk.length > 0); } /** * Get available languages */ export function getLanguages() { return LANGUAGES; } /** * Translate a single chunk of text */ async function translateChunk(text, targetLanguage, sourceLanguage) { const prompt = sourceLanguage ? `Translate the following text from ${sourceLanguage} to ${targetLanguage}. Only output the translation, nothing else:\n\n${text}` : `Translate the following text to ${targetLanguage}. Only output the translation, nothing else:\n\n${text}`; const response = await getOpenAI().chat.completions.create({ model: 'gpt-4o-mini', max_tokens: 16384, messages: [ { role: 'user', content: prompt, }, ], }); return response.choices[0].message.content; } /** * Translate text using GPT-4o-mini with chunking for long texts * @param {string} text - Text to translate * @param {string} targetLang - Target language code (e.g., 'en', 'fr') * @param {string} sourceLang - Source language code (optional, auto-detect if null) */ export async function translateText(text, targetLang, sourceLang = null) { if (!text || !text.trim()) { throw new Error('No text provided for translation'); } const targetLanguage = LANGUAGES[targetLang] || targetLang; const sourceLanguage = sourceLang ? (LANGUAGES[sourceLang] || sourceLang) : null; try { // Split text into chunks const chunks = splitIntoChunks(text); if (chunks.length === 1) { // Single chunk - translate directly const translation = await translateChunk(text, targetLanguage, sourceLanguage); return { success: true, originalText: text, translatedText: translation, targetLanguage: targetLanguage, sourceLanguage: sourceLanguage || 'auto-detected', chunks: 1, }; } // Multiple chunks - translate each and combine console.log(`Splitting text into ${chunks.length} chunks for translation...`); const translations = []; for (let i = 0; i < chunks.length; i++) { console.log(` Translating chunk ${i + 1}/${chunks.length} (${chunks[i].length} chars)...`); const translation = await translateChunk(chunks[i], targetLanguage, sourceLanguage); translations.push(translation); } const combinedTranslation = translations.join('\n\n'); return { success: true, originalText: text, translatedText: combinedTranslation, targetLanguage: targetLanguage, sourceLanguage: sourceLanguage || 'auto-detected', chunks: chunks.length, }; } catch (error) { throw new Error(`Translation failed: ${error.message}`); } } /** * Translate a text file * @param {string} filePath - Path to text file * @param {string} targetLang - Target language code * @param {string} sourceLang - Source language code (optional) * @param {string} outputDir - Output directory (optional) */ export async function translateFile(filePath, targetLang, sourceLang = null, outputDir = null) { if (!fs.existsSync(filePath)) { throw new Error(`File not found: ${filePath}`); } const text = fs.readFileSync(filePath, 'utf-8'); const result = await translateText(text, targetLang, sourceLang); // Save translation const baseName = path.basename(filePath, path.extname(filePath)); const outputPath = path.join( outputDir || path.dirname(filePath), `${baseName}_${targetLang}.txt` ); fs.writeFileSync(outputPath, result.translatedText, 'utf-8'); return { ...result, originalPath: filePath, translationPath: outputPath, }; } /** * Translate multiple files */ export async function translateMultiple(filePaths, targetLang, sourceLang = null, outputDir = null, onProgress = null) { const results = []; for (let i = 0; i < filePaths.length; i++) { const filePath = filePaths[i]; if (onProgress) { onProgress({ current: i + 1, total: filePaths.length, filePath }); } console.log(`[${i + 1}/${filePaths.length}] Translating: ${path.basename(filePath)}`); try { const result = await translateFile(filePath, targetLang, sourceLang, outputDir); results.push(result); } catch (error) { console.error(`Failed to translate ${filePath}: ${error.message}`); results.push({ success: false, originalPath: filePath, error: error.message, }); } } return { success: true, results, totalFiles: filePaths.length, successCount: results.filter(r => r.success).length, failCount: results.filter(r => !r.success).length, }; }