Class_generator/Legacy/js/core/content-parsers.js

// === PARSERS UNIVERSELS POUR CONTENU ===

// === PARSER DE TEXTE LIBRE ===
class TextParser {
    async parse(text, options = {}) {
        logSh('📝 TextParser - Analyse du texte libre', 'INFO');

        const result = {
            rawText: text,
            vocabulary: [],
            sentences: [],
            dialogue: null,
            sequence: null,
            metadata: {
                wordCount: text.split(' ').length,
                language: this.detectLanguage(text),
                structure: this.analyzeStructure(text)
            }
        };

        // Détecter le type de contenu
        if (this.isVocabularyList(text)) {
            result.vocabulary = this.parseVocabularyList(text);
        } else if (this.isDialogue(text)) {
            result.dialogue = this.parseDialogue(text);
        } else if (this.isSequence(text)) {
            result.sequence = this.parseSequence(text);
        } else {
            result.sentences = this.parseSentences(text);
        }

        return result;
    }

    isVocabularyList(text) {
        // Recherche patterns: "word = translation", "word: translation", "word - translation"
        const patterns = [/\w+\s*[=:-]\s*\w+/g, /\w+\s*=\s*\w+/g];
        return patterns.some(pattern => pattern.test(text));
    }

    parseVocabularyList(text) {
        const vocabulary = [];
        const lines = text.split('\n').filter(line => line.trim());

        lines.forEach((line, index) => {
            const matches = line.match(/(.+?)\s*[=:-]\s*(.+?)(?:\s*\((.+?)\))?$/);
            if (matches) {
                const [, english, french, category] = matches;
                vocabulary.push({
                    english: english.trim(),
                    french: french.trim(),
                    category: category?.trim() || 'general',
                    index: index
                });
            }
        });

        return vocabulary;
    }

    isDialogue(text) {
        // Recherche patterns: "A:", "Person1:", "- Alice:", etc.
        return /^[A-Z][^:]*:|^-\s*[A-Z]/m.test(text);
    }

    parseDialogue(text) {
        const conversation = [];
        const lines = text.split('\n').filter(line => line.trim());

        lines.forEach(line => {
            const speakerMatch = line.match(/^(?:-\s*)?([^:]+):\s*(.+)$/);
            if (speakerMatch) {
                const [, speaker, text] = speakerMatch;
                conversation.push({
                    speaker: speaker.trim(),
                    text: text.trim(),
                    english: text.trim() // À traduire si nécessaire
                });
            }
        });

        return {
            scenario: 'conversation',
            conversation: conversation,
            speakers: [...new Set(conversation.map(c => c.speaker))]
        };
    }

    isSequence(text) {
        // Recherche patterns: "1.", "First", "Then", "Finally", etc.
        const sequenceIndicators = /^(\d+\.|\d+\))|first|then|next|after|finally|lastly/mi;
        return sequenceIndicators.test(text);
    }

    parseSequence(text) {
        const steps = [];
        const lines = text.split('\n').filter(line => line.trim());

        lines.forEach((line, index) => {
            const stepMatch = line.match(/^(?:(\d+)[\.\)]\s*)?(.+)$/);
            if (stepMatch) {
                const [, number, stepText] = stepMatch;
                steps.push({
                    order: number ? parseInt(number) : index + 1,
                    english: stepText.trim(),
                    french: '', // À traduire
                    index: index
                });
            }
        });

        return {
            title: 'Sequence',
            steps: steps.sort((a, b) => a.order - b.order)
        };
    }

    parseSentences(text) {
        // Séparer en phrases
        const sentences = text.split(/[.!?]+/).filter(s => s.trim().length > 3);

        return sentences.map((sentence, index) => ({
            english: sentence.trim(),
            french: '', // À traduire
            index: index,
            structure: this.analyzeSentenceStructure(sentence)
        }));
    }

    detectLanguage(text) {
        // Détection simple basée sur des mots courants
        const englishWords = ['the', 'and', 'is', 'in', 'to', 'of', 'a', 'that'];
        const frenchWords = ['le', 'et', 'est', 'dans', 'de', 'la', 'que', 'un'];

        const words = text.toLowerCase().split(/\s+/);
        const englishCount = words.filter(w => englishWords.includes(w)).length;
        const frenchCount = words.filter(w => frenchWords.includes(w)).length;

        if (englishCount > frenchCount) return 'english';
        if (frenchCount > englishCount) return 'french';
        return 'mixed';
    }

    analyzeStructure(text) {
        return {
            hasNumbers: /\d+/.test(text),
            hasColons: /:/.test(text),
            hasEquals: /=/.test(text),
            hasDashes: /-/.test(text),
            lineCount: text.split('\n').length,
            avgWordsPerLine: text.split('\n').reduce((acc, line) => acc + line.split(' ').length, 0) / text.split('\n').length
        };
    }

    analyzeSentenceStructure(sentence) {
        return {
            wordCount: sentence.split(' ').length,
            hasQuestion: sentence.includes('?'),
            hasExclamation: sentence.includes('!'),
            complexity: sentence.split(' ').length > 10 ? 'complex' : 'simple'
        };
    }
}

// === PARSER CSV ===
class CSVParser {
    async parse(csvText, options = {}) {
        logSh('📊 CSVParser - Analyse CSV', 'INFO');

        const separator = options.separator || this.detectSeparator(csvText);
        const lines = csvText.split('\n').filter(line => line.trim());
        const headers = lines[0].split(separator).map(h => h.trim());

        const vocabulary = [];

        for (let i = 1; i < lines.length; i++) {
            const values = lines[i].split(separator).map(v => v.trim());
            const entry = {};

            headers.forEach((header, index) => {
                entry[header.toLowerCase()] = values[index] || '';
            });

            vocabulary.push(entry);
        }

        return {
            vocabulary: vocabulary,
            headers: headers,
            format: 'csv',
            separator: separator
        };
    }

    detectSeparator(csvText) {
        const separators = [',', ';', '\t', '|'];
        const firstLine = csvText.split('\n')[0];

        let maxCount = 0;
        let bestSeparator = ',';

        separators.forEach(sep => {
            const count = (firstLine.match(new RegExp('\\' + sep, 'g')) || []).length;
            if (count > maxCount) {
                maxCount = count;
                bestSeparator = sep;
            }
        });

        return bestSeparator;
    }
}

// === PARSER JSON ===
class JSONParser {
    async parse(jsonData, options = {}) {
        logSh('🔗 JSONParser - Analyse JSON', 'INFO');

        let data;
        if (typeof jsonData === 'string') {
            try {
                data = JSON.parse(jsonData);
            } catch (error) {
                throw new Error('JSON invalide: ' + error.message);
            }
        } else {
            data = jsonData;
        }

        return {
            ...data,
            format: 'json',
            parsed: true
        };
    }
}

// === PARSER DIALOGUE SPÉCIALISÉ ===
class DialogueParser {
    async parse(dialogueText, options = {}) {
        logSh('💬 DialogueParser - Analyse dialogue', 'INFO');

        const scenes = this.extractScenes(dialogueText);
        const characters = this.extractCharacters(dialogueText);
        const conversations = this.parseConversations(dialogueText);

        return {
            dialogue: true,
            scenes: scenes,
            characters: characters,
            conversations: conversations,
            format: 'dialogue'
        };
    }

    extractScenes(text) {
        // Rechercher des indications de scène: [Scene], (Scene), etc.
        const sceneMatches = text.match(/\[([^\]]+)\]|\(([^)]+)\)/g) || [];
        return sceneMatches.map(match => match.replace(/[\[\]()]/g, ''));
    }

    extractCharacters(text) {
        // Extraire tous les noms avant ":"
        const characterMatches = text.match(/^[^:\n]+:/gm) || [];
        const characters = new Set();

        characterMatches.forEach(match => {
            const name = match.replace(':', '').trim();
            if (name.length > 0 && name.length < 30) {
                characters.add(name);
            }
        });

        return Array.from(characters);
    }

    parseConversations(text) {
        const conversations = [];
        const lines = text.split('\n');

        let currentScene = 'Scene 1';

        lines.forEach(line => {
            line = line.trim();

            // Détection de nouvelle scène
            if (line.match(/\[([^\]]+)\]|\(([^)]+)\)/)) {
                currentScene = line.replace(/[\[\]()]/g, '');
                return;
            }

            // Détection de dialogue
            const dialogueMatch = line.match(/^([^:]+):\s*(.+)$/);
            if (dialogueMatch) {
                const [, speaker, text] = dialogueMatch;
                conversations.push({
                    scene: currentScene,
                    speaker: speaker.trim(),
                    english: text.trim(),
                    french: '', // À traduire
                    timestamp: conversations.length
                });
            }
        });

        return conversations;
    }
}

// === PARSER SÉQUENCE SPÉCIALISÉ ===
class SequenceParser {
    async parse(sequenceText, options = {}) {
        logSh('📋 SequenceParser - Analyse séquence', 'INFO');

        const title = this.extractTitle(sequenceText);
        const steps = this.extractSteps(sequenceText);
        const timeline = this.extractTimeline(sequenceText);

        return {
            sequence: true,
            title: title,
            steps: steps,
            timeline: timeline,
            format: 'sequence'
        };
    }

    extractTitle(text) {
        // Chercher un titre en début de texte
        const lines = text.split('\n');
        const firstLine = lines[0].trim();

        // Si la première ligne ne commence pas par un numéro, c'est probablement le titre
        if (!firstLine.match(/^\d+/)) {
            return firstLine;
        }

        return 'Sequence';
    }

    extractSteps(text) {
        const steps = [];
        const lines = text.split('\n').filter(line => line.trim());

        lines.forEach((line, index) => {
            // Ignorer la première ligne si c'est le titre
            if (index === 0 && !line.match(/^\d+/)) {
                return;
            }

            const stepPatterns = [
                /^(\d+)[\.\)]\s*(.+)$/,           // "1. Step text"
                /^(First|Then|Next|After|Finally|Lastly)[:.]?\s*(.+)$/i,  // "First: text"
                /^(.+)$/                          // Fallback: toute ligne
            ];

            for (let pattern of stepPatterns) {
                const match = line.match(pattern);
                if (match) {
                    let [, indicator, stepText] = match;

                    if (!stepText) {
                        stepText = indicator;
                        indicator = (steps.length + 1).toString();
                    }

                    steps.push({
                        order: this.normalizeStepNumber(indicator, steps.length + 1),
                        english: stepText.trim(),
                        french: '', // À traduire
                        indicator: indicator,
                        rawLine: line
                    });
                    break;
                }
            }
        });

        return steps.sort((a, b) => a.order - b.order);
    }

    normalizeStepNumber(indicator, fallback) {
        if (/^\d+$/.test(indicator)) {
            return parseInt(indicator);
        }

        const wordNumbers = {
            'first': 1, 'second': 2, 'third': 3, 'fourth': 4, 'fifth': 5,
            'then': fallback, 'next': fallback, 'after': fallback,
            'finally': 999, 'lastly': 999
        };

        return wordNumbers[indicator.toLowerCase()] || fallback;
    }

    extractTimeline(text) {
        // Rechercher des indications de temps: "7:00", "at 8pm", "in the morning"
        const timeMatches = text.match(/\d{1,2}:\d{2}|\d{1,2}(am|pm)|morning|afternoon|evening|night/gi) || [];
        return timeMatches;
    }
}

// === PARSER MÉDIA ===
class MediaParser {
    async parse(mediaData, options = {}) {
        logSh('🎵 MediaParser - Analyse médias', 'INFO');

        const result = {
            audio: [],
            images: [],
            metadata: {},
            format: 'media'
        };

        if (Array.isArray(mediaData)) {
            mediaData.forEach(file => {
                if (this.isAudioFile(file)) {
                    result.audio.push(this.parseAudioFile(file));
                } else if (this.isImageFile(file)) {
                    result.images.push(this.parseImageFile(file));
                }
            });
        }

        return result;
    }

    isAudioFile(file) {
        const audioExtensions = ['mp3', 'wav', 'ogg', 'm4a', 'flac'];
        const extension = this.getFileExtension(file.name || file);
        return audioExtensions.includes(extension.toLowerCase());
    }

    isImageFile(file) {
        const imageExtensions = ['jpg', 'jpeg', 'png', 'gif', 'webp', 'svg'];
        const extension = this.getFileExtension(file.name || file);
        return imageExtensions.includes(extension.toLowerCase());
    }

    getFileExtension(filename) {
        return filename.split('.').pop() || '';
    }

    parseAudioFile(file) {
        return {
            name: file.name,
            path: file.path || file.name,
            type: 'audio',
            extension: this.getFileExtension(file.name),
            associatedWord: this.extractWordFromFilename(file.name),
            metadata: {
                size: file.size,
                duration: file.duration || null
            }
        };
    }

    parseImageFile(file) {
        return {
            name: file.name,
            path: file.path || file.name,
            type: 'image',
            extension: this.getFileExtension(file.name),
            associatedWord: this.extractWordFromFilename(file.name),
            metadata: {
                size: file.size,
                width: file.width || null,
                height: file.height || null
            }
        };
    }

    extractWordFromFilename(filename) {
        // Extraire le mot du nom de fichier: "cat.mp3" -> "cat"
        return filename.split('.')[0].replace(/[_-]/g, ' ').trim();
    }
}

// Export global
window.TextParser = TextParser;
window.CSVParser = CSVParser;
window.JSONParser = JSONParser;
window.DialogueParser = DialogueParser;
window.SequenceParser = SequenceParser;
window.MediaParser = MediaParser;