// === PARSERS UNIVERSELS POUR CONTENU === // === PARSER DE TEXTE LIBRE === class TextParser { async parse(text, options = {}) { console.log('📝 TextParser - Analyse du texte libre'); const result = { rawText: text, vocabulary: [], sentences: [], dialogue: null, sequence: null, metadata: { wordCount: text.split(' ').length, language: this.detectLanguage(text), structure: this.analyzeStructure(text) } }; // Détecter le type de contenu if (this.isVocabularyList(text)) { result.vocabulary = this.parseVocabularyList(text); } else if (this.isDialogue(text)) { result.dialogue = this.parseDialogue(text); } else if (this.isSequence(text)) { result.sequence = this.parseSequence(text); } else { result.sentences = this.parseSentences(text); } return result; } isVocabularyList(text) { // Recherche patterns: "word = translation", "word: translation", "word - translation" const patterns = [/\w+\s*[=:-]\s*\w+/g, /\w+\s*=\s*\w+/g]; return patterns.some(pattern => pattern.test(text)); } parseVocabularyList(text) { const vocabulary = []; const lines = text.split('\n').filter(line => line.trim()); lines.forEach((line, index) => { const matches = line.match(/(.+?)\s*[=:-]\s*(.+?)(?:\s*\((.+?)\))?$/); if (matches) { const [, english, french, category] = matches; vocabulary.push({ english: english.trim(), french: french.trim(), category: category?.trim() || 'general', index: index }); } }); return vocabulary; } isDialogue(text) { // Recherche patterns: "A:", "Person1:", "- Alice:", etc. return /^[A-Z][^:]*:|^-\s*[A-Z]/m.test(text); } parseDialogue(text) { const conversation = []; const lines = text.split('\n').filter(line => line.trim()); lines.forEach(line => { const speakerMatch = line.match(/^(?:-\s*)?([^:]+):\s*(.+)$/); if (speakerMatch) { const [, speaker, text] = speakerMatch; conversation.push({ speaker: speaker.trim(), text: text.trim(), english: text.trim() // À traduire si nécessaire }); } }); return { scenario: 'conversation', conversation: conversation, speakers: [...new Set(conversation.map(c => c.speaker))] }; } isSequence(text) { // Recherche patterns: "1.", "First", "Then", "Finally", etc. const sequenceIndicators = /^(\d+\.|\d+\))|first|then|next|after|finally|lastly/mi; return sequenceIndicators.test(text); } parseSequence(text) { const steps = []; const lines = text.split('\n').filter(line => line.trim()); lines.forEach((line, index) => { const stepMatch = line.match(/^(?:(\d+)[\.\)]\s*)?(.+)$/); if (stepMatch) { const [, number, stepText] = stepMatch; steps.push({ order: number ? parseInt(number) : index + 1, english: stepText.trim(), french: '', // À traduire index: index }); } }); return { title: 'Sequence', steps: steps.sort((a, b) => a.order - b.order) }; } parseSentences(text) { // Séparer en phrases const sentences = text.split(/[.!?]+/).filter(s => s.trim().length > 3); return sentences.map((sentence, index) => ({ english: sentence.trim(), french: '', // À traduire index: index, structure: this.analyzeSentenceStructure(sentence) })); } detectLanguage(text) { // Détection simple basée sur des mots courants const englishWords = ['the', 'and', 'is', 'in', 'to', 'of', 'a', 'that']; const frenchWords = ['le', 'et', 'est', 'dans', 'de', 'la', 'que', 'un']; const words = text.toLowerCase().split(/\s+/); const englishCount = words.filter(w => englishWords.includes(w)).length; const frenchCount = words.filter(w => frenchWords.includes(w)).length; if (englishCount > frenchCount) return 'english'; if (frenchCount > englishCount) return 'french'; return 'mixed'; } analyzeStructure(text) { return { hasNumbers: /\d+/.test(text), hasColons: /:/.test(text), hasEquals: /=/.test(text), hasDashes: /-/.test(text), lineCount: text.split('\n').length, avgWordsPerLine: text.split('\n').reduce((acc, line) => acc + line.split(' ').length, 0) / text.split('\n').length }; } analyzeSentenceStructure(sentence) { return { wordCount: sentence.split(' ').length, hasQuestion: sentence.includes('?'), hasExclamation: sentence.includes('!'), complexity: sentence.split(' ').length > 10 ? 'complex' : 'simple' }; } } // === PARSER CSV === class CSVParser { async parse(csvText, options = {}) { console.log('📊 CSVParser - Analyse CSV'); const separator = options.separator || this.detectSeparator(csvText); const lines = csvText.split('\n').filter(line => line.trim()); const headers = lines[0].split(separator).map(h => h.trim()); const vocabulary = []; for (let i = 1; i < lines.length; i++) { const values = lines[i].split(separator).map(v => v.trim()); const entry = {}; headers.forEach((header, index) => { entry[header.toLowerCase()] = values[index] || ''; }); vocabulary.push(entry); } return { vocabulary: vocabulary, headers: headers, format: 'csv', separator: separator }; } detectSeparator(csvText) { const separators = [',', ';', '\t', '|']; const firstLine = csvText.split('\n')[0]; let maxCount = 0; let bestSeparator = ','; separators.forEach(sep => { const count = (firstLine.match(new RegExp('\\' + sep, 'g')) || []).length; if (count > maxCount) { maxCount = count; bestSeparator = sep; } }); return bestSeparator; } } // === PARSER JSON === class JSONParser { async parse(jsonData, options = {}) { console.log('🔗 JSONParser - Analyse JSON'); let data; if (typeof jsonData === 'string') { try { data = JSON.parse(jsonData); } catch (error) { throw new Error('JSON invalide: ' + error.message); } } else { data = jsonData; } return { ...data, format: 'json', parsed: true }; } } // === PARSER DIALOGUE SPÉCIALISÉ === class DialogueParser { async parse(dialogueText, options = {}) { console.log('💬 DialogueParser - Analyse dialogue'); const scenes = this.extractScenes(dialogueText); const characters = this.extractCharacters(dialogueText); const conversations = this.parseConversations(dialogueText); return { dialogue: true, scenes: scenes, characters: characters, conversations: conversations, format: 'dialogue' }; } extractScenes(text) { // Rechercher des indications de scène: [Scene], (Scene), etc. const sceneMatches = text.match(/\[([^\]]+)\]|\(([^)]+)\)/g) || []; return sceneMatches.map(match => match.replace(/[\[\]()]/g, '')); } extractCharacters(text) { // Extraire tous les noms avant ":" const characterMatches = text.match(/^[^:\n]+:/gm) || []; const characters = new Set(); characterMatches.forEach(match => { const name = match.replace(':', '').trim(); if (name.length > 0 && name.length < 30) { characters.add(name); } }); return Array.from(characters); } parseConversations(text) { const conversations = []; const lines = text.split('\n'); let currentScene = 'Scene 1'; lines.forEach(line => { line = line.trim(); // Détection de nouvelle scène if (line.match(/\[([^\]]+)\]|\(([^)]+)\)/)) { currentScene = line.replace(/[\[\]()]/g, ''); return; } // Détection de dialogue const dialogueMatch = line.match(/^([^:]+):\s*(.+)$/); if (dialogueMatch) { const [, speaker, text] = dialogueMatch; conversations.push({ scene: currentScene, speaker: speaker.trim(), english: text.trim(), french: '', // À traduire timestamp: conversations.length }); } }); return conversations; } } // === PARSER SÉQUENCE SPÉCIALISÉ === class SequenceParser { async parse(sequenceText, options = {}) { console.log('📋 SequenceParser - Analyse séquence'); const title = this.extractTitle(sequenceText); const steps = this.extractSteps(sequenceText); const timeline = this.extractTimeline(sequenceText); return { sequence: true, title: title, steps: steps, timeline: timeline, format: 'sequence' }; } extractTitle(text) { // Chercher un titre en début de texte const lines = text.split('\n'); const firstLine = lines[0].trim(); // Si la première ligne ne commence pas par un numéro, c'est probablement le titre if (!firstLine.match(/^\d+/)) { return firstLine; } return 'Sequence'; } extractSteps(text) { const steps = []; const lines = text.split('\n').filter(line => line.trim()); lines.forEach((line, index) => { // Ignorer la première ligne si c'est le titre if (index === 0 && !line.match(/^\d+/)) { return; } const stepPatterns = [ /^(\d+)[\.\)]\s*(.+)$/, // "1. Step text" /^(First|Then|Next|After|Finally|Lastly)[:.]?\s*(.+)$/i, // "First: text" /^(.+)$/ // Fallback: toute ligne ]; for (let pattern of stepPatterns) { const match = line.match(pattern); if (match) { let [, indicator, stepText] = match; if (!stepText) { stepText = indicator; indicator = (steps.length + 1).toString(); } steps.push({ order: this.normalizeStepNumber(indicator, steps.length + 1), english: stepText.trim(), french: '', // À traduire indicator: indicator, rawLine: line }); break; } } }); return steps.sort((a, b) => a.order - b.order); } normalizeStepNumber(indicator, fallback) { if (/^\d+$/.test(indicator)) { return parseInt(indicator); } const wordNumbers = { 'first': 1, 'second': 2, 'third': 3, 'fourth': 4, 'fifth': 5, 'then': fallback, 'next': fallback, 'after': fallback, 'finally': 999, 'lastly': 999 }; return wordNumbers[indicator.toLowerCase()] || fallback; } extractTimeline(text) { // Rechercher des indications de temps: "7:00", "at 8pm", "in the morning" const timeMatches = text.match(/\d{1,2}:\d{2}|\d{1,2}(am|pm)|morning|afternoon|evening|night/gi) || []; return timeMatches; } } // === PARSER MÉDIA === class MediaParser { async parse(mediaData, options = {}) { console.log('🎵 MediaParser - Analyse médias'); const result = { audio: [], images: [], metadata: {}, format: 'media' }; if (Array.isArray(mediaData)) { mediaData.forEach(file => { if (this.isAudioFile(file)) { result.audio.push(this.parseAudioFile(file)); } else if (this.isImageFile(file)) { result.images.push(this.parseImageFile(file)); } }); } return result; } isAudioFile(file) { const audioExtensions = ['mp3', 'wav', 'ogg', 'm4a', 'flac']; const extension = this.getFileExtension(file.name || file); return audioExtensions.includes(extension.toLowerCase()); } isImageFile(file) { const imageExtensions = ['jpg', 'jpeg', 'png', 'gif', 'webp', 'svg']; const extension = this.getFileExtension(file.name || file); return imageExtensions.includes(extension.toLowerCase()); } getFileExtension(filename) { return filename.split('.').pop() || ''; } parseAudioFile(file) { return { name: file.name, path: file.path || file.name, type: 'audio', extension: this.getFileExtension(file.name), associatedWord: this.extractWordFromFilename(file.name), metadata: { size: file.size, duration: file.duration || null } }; } parseImageFile(file) { return { name: file.name, path: file.path || file.name, type: 'image', extension: this.getFileExtension(file.name), associatedWord: this.extractWordFromFilename(file.name), metadata: { size: file.size, width: file.width || null, height: file.height || null } }; } extractWordFromFilename(filename) { // Extraire le mot du nom de fichier: "cat.mp3" -> "cat" return filename.split('.')[0].replace(/[_-]/g, ' ').trim(); } } // Export global window.TextParser = TextParser; window.CSVParser = CSVParser; window.JSONParser = JSONParser; window.DialogueParser = DialogueParser; window.SequenceParser = SequenceParser; window.MediaParser = MediaParser;