Class_generator/Legacy/js/core/content-parsers.js
StillHammer 38920cc858 Complete architectural rewrite with ultra-modular system
Major Changes:
- Moved legacy system to Legacy/ folder for archival
- Built new modular architecture with strict separation of concerns
- Created core system: Module, EventBus, ModuleLoader, Router
- Added Application bootstrap with auto-start functionality
- Implemented development server with ES6 modules support
- Created comprehensive documentation and project context
- Converted SBS-7-8 content to JSON format
- Copied all legacy games and content to new structure

New Architecture Features:
- Sealed modules with WeakMap private data
- Strict dependency injection system
- Event-driven communication only
- Inviolable responsibility patterns
- Auto-initialization without commands
- Component-based UI foundation ready

Technical Stack:
- Vanilla JS/HTML/CSS only
- ES6 modules with proper imports/exports
- HTTP development server (no file:// protocol)
- Modular CSS with component scoping
- Comprehensive error handling and debugging

Ready for Phase 2: Converting legacy modules to new architecture

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-22 07:08:39 +08:00

485 lines
15 KiB
JavaScript

// === PARSERS UNIVERSELS POUR CONTENU ===
// === PARSER DE TEXTE LIBRE ===
class TextParser {
async parse(text, options = {}) {
logSh('📝 TextParser - Analyse du texte libre', 'INFO');
const result = {
rawText: text,
vocabulary: [],
sentences: [],
dialogue: null,
sequence: null,
metadata: {
wordCount: text.split(' ').length,
language: this.detectLanguage(text),
structure: this.analyzeStructure(text)
}
};
// Détecter le type de contenu
if (this.isVocabularyList(text)) {
result.vocabulary = this.parseVocabularyList(text);
} else if (this.isDialogue(text)) {
result.dialogue = this.parseDialogue(text);
} else if (this.isSequence(text)) {
result.sequence = this.parseSequence(text);
} else {
result.sentences = this.parseSentences(text);
}
return result;
}
isVocabularyList(text) {
// Recherche patterns: "word = translation", "word: translation", "word - translation"
const patterns = [/\w+\s*[=:-]\s*\w+/g, /\w+\s*=\s*\w+/g];
return patterns.some(pattern => pattern.test(text));
}
parseVocabularyList(text) {
const vocabulary = [];
const lines = text.split('\n').filter(line => line.trim());
lines.forEach((line, index) => {
const matches = line.match(/(.+?)\s*[=:-]\s*(.+?)(?:\s*\((.+?)\))?$/);
if (matches) {
const [, english, french, category] = matches;
vocabulary.push({
english: english.trim(),
french: french.trim(),
category: category?.trim() || 'general',
index: index
});
}
});
return vocabulary;
}
isDialogue(text) {
// Recherche patterns: "A:", "Person1:", "- Alice:", etc.
return /^[A-Z][^:]*:|^-\s*[A-Z]/m.test(text);
}
parseDialogue(text) {
const conversation = [];
const lines = text.split('\n').filter(line => line.trim());
lines.forEach(line => {
const speakerMatch = line.match(/^(?:-\s*)?([^:]+):\s*(.+)$/);
if (speakerMatch) {
const [, speaker, text] = speakerMatch;
conversation.push({
speaker: speaker.trim(),
text: text.trim(),
english: text.trim() // À traduire si nécessaire
});
}
});
return {
scenario: 'conversation',
conversation: conversation,
speakers: [...new Set(conversation.map(c => c.speaker))]
};
}
isSequence(text) {
// Recherche patterns: "1.", "First", "Then", "Finally", etc.
const sequenceIndicators = /^(\d+\.|\d+\))|first|then|next|after|finally|lastly/mi;
return sequenceIndicators.test(text);
}
parseSequence(text) {
const steps = [];
const lines = text.split('\n').filter(line => line.trim());
lines.forEach((line, index) => {
const stepMatch = line.match(/^(?:(\d+)[\.\)]\s*)?(.+)$/);
if (stepMatch) {
const [, number, stepText] = stepMatch;
steps.push({
order: number ? parseInt(number) : index + 1,
english: stepText.trim(),
french: '', // À traduire
index: index
});
}
});
return {
title: 'Sequence',
steps: steps.sort((a, b) => a.order - b.order)
};
}
parseSentences(text) {
// Séparer en phrases
const sentences = text.split(/[.!?]+/).filter(s => s.trim().length > 3);
return sentences.map((sentence, index) => ({
english: sentence.trim(),
french: '', // À traduire
index: index,
structure: this.analyzeSentenceStructure(sentence)
}));
}
detectLanguage(text) {
// Détection simple basée sur des mots courants
const englishWords = ['the', 'and', 'is', 'in', 'to', 'of', 'a', 'that'];
const frenchWords = ['le', 'et', 'est', 'dans', 'de', 'la', 'que', 'un'];
const words = text.toLowerCase().split(/\s+/);
const englishCount = words.filter(w => englishWords.includes(w)).length;
const frenchCount = words.filter(w => frenchWords.includes(w)).length;
if (englishCount > frenchCount) return 'english';
if (frenchCount > englishCount) return 'french';
return 'mixed';
}
analyzeStructure(text) {
return {
hasNumbers: /\d+/.test(text),
hasColons: /:/.test(text),
hasEquals: /=/.test(text),
hasDashes: /-/.test(text),
lineCount: text.split('\n').length,
avgWordsPerLine: text.split('\n').reduce((acc, line) => acc + line.split(' ').length, 0) / text.split('\n').length
};
}
analyzeSentenceStructure(sentence) {
return {
wordCount: sentence.split(' ').length,
hasQuestion: sentence.includes('?'),
hasExclamation: sentence.includes('!'),
complexity: sentence.split(' ').length > 10 ? 'complex' : 'simple'
};
}
}
// === PARSER CSV ===
class CSVParser {
async parse(csvText, options = {}) {
logSh('📊 CSVParser - Analyse CSV', 'INFO');
const separator = options.separator || this.detectSeparator(csvText);
const lines = csvText.split('\n').filter(line => line.trim());
const headers = lines[0].split(separator).map(h => h.trim());
const vocabulary = [];
for (let i = 1; i < lines.length; i++) {
const values = lines[i].split(separator).map(v => v.trim());
const entry = {};
headers.forEach((header, index) => {
entry[header.toLowerCase()] = values[index] || '';
});
vocabulary.push(entry);
}
return {
vocabulary: vocabulary,
headers: headers,
format: 'csv',
separator: separator
};
}
detectSeparator(csvText) {
const separators = [',', ';', '\t', '|'];
const firstLine = csvText.split('\n')[0];
let maxCount = 0;
let bestSeparator = ',';
separators.forEach(sep => {
const count = (firstLine.match(new RegExp('\\' + sep, 'g')) || []).length;
if (count > maxCount) {
maxCount = count;
bestSeparator = sep;
}
});
return bestSeparator;
}
}
// === PARSER JSON ===
class JSONParser {
async parse(jsonData, options = {}) {
logSh('🔗 JSONParser - Analyse JSON', 'INFO');
let data;
if (typeof jsonData === 'string') {
try {
data = JSON.parse(jsonData);
} catch (error) {
throw new Error('JSON invalide: ' + error.message);
}
} else {
data = jsonData;
}
return {
...data,
format: 'json',
parsed: true
};
}
}
// === PARSER DIALOGUE SPÉCIALISÉ ===
class DialogueParser {
async parse(dialogueText, options = {}) {
logSh('💬 DialogueParser - Analyse dialogue', 'INFO');
const scenes = this.extractScenes(dialogueText);
const characters = this.extractCharacters(dialogueText);
const conversations = this.parseConversations(dialogueText);
return {
dialogue: true,
scenes: scenes,
characters: characters,
conversations: conversations,
format: 'dialogue'
};
}
extractScenes(text) {
// Rechercher des indications de scène: [Scene], (Scene), etc.
const sceneMatches = text.match(/\[([^\]]+)\]|\(([^)]+)\)/g) || [];
return sceneMatches.map(match => match.replace(/[\[\]()]/g, ''));
}
extractCharacters(text) {
// Extraire tous les noms avant ":"
const characterMatches = text.match(/^[^:\n]+:/gm) || [];
const characters = new Set();
characterMatches.forEach(match => {
const name = match.replace(':', '').trim();
if (name.length > 0 && name.length < 30) {
characters.add(name);
}
});
return Array.from(characters);
}
parseConversations(text) {
const conversations = [];
const lines = text.split('\n');
let currentScene = 'Scene 1';
lines.forEach(line => {
line = line.trim();
// Détection de nouvelle scène
if (line.match(/\[([^\]]+)\]|\(([^)]+)\)/)) {
currentScene = line.replace(/[\[\]()]/g, '');
return;
}
// Détection de dialogue
const dialogueMatch = line.match(/^([^:]+):\s*(.+)$/);
if (dialogueMatch) {
const [, speaker, text] = dialogueMatch;
conversations.push({
scene: currentScene,
speaker: speaker.trim(),
english: text.trim(),
french: '', // À traduire
timestamp: conversations.length
});
}
});
return conversations;
}
}
// === PARSER SÉQUENCE SPÉCIALISÉ ===
class SequenceParser {
async parse(sequenceText, options = {}) {
logSh('📋 SequenceParser - Analyse séquence', 'INFO');
const title = this.extractTitle(sequenceText);
const steps = this.extractSteps(sequenceText);
const timeline = this.extractTimeline(sequenceText);
return {
sequence: true,
title: title,
steps: steps,
timeline: timeline,
format: 'sequence'
};
}
extractTitle(text) {
// Chercher un titre en début de texte
const lines = text.split('\n');
const firstLine = lines[0].trim();
// Si la première ligne ne commence pas par un numéro, c'est probablement le titre
if (!firstLine.match(/^\d+/)) {
return firstLine;
}
return 'Sequence';
}
extractSteps(text) {
const steps = [];
const lines = text.split('\n').filter(line => line.trim());
lines.forEach((line, index) => {
// Ignorer la première ligne si c'est le titre
if (index === 0 && !line.match(/^\d+/)) {
return;
}
const stepPatterns = [
/^(\d+)[\.\)]\s*(.+)$/, // "1. Step text"
/^(First|Then|Next|After|Finally|Lastly)[:.]?\s*(.+)$/i, // "First: text"
/^(.+)$/ // Fallback: toute ligne
];
for (let pattern of stepPatterns) {
const match = line.match(pattern);
if (match) {
let [, indicator, stepText] = match;
if (!stepText) {
stepText = indicator;
indicator = (steps.length + 1).toString();
}
steps.push({
order: this.normalizeStepNumber(indicator, steps.length + 1),
english: stepText.trim(),
french: '', // À traduire
indicator: indicator,
rawLine: line
});
break;
}
}
});
return steps.sort((a, b) => a.order - b.order);
}
normalizeStepNumber(indicator, fallback) {
if (/^\d+$/.test(indicator)) {
return parseInt(indicator);
}
const wordNumbers = {
'first': 1, 'second': 2, 'third': 3, 'fourth': 4, 'fifth': 5,
'then': fallback, 'next': fallback, 'after': fallback,
'finally': 999, 'lastly': 999
};
return wordNumbers[indicator.toLowerCase()] || fallback;
}
extractTimeline(text) {
// Rechercher des indications de temps: "7:00", "at 8pm", "in the morning"
const timeMatches = text.match(/\d{1,2}:\d{2}|\d{1,2}(am|pm)|morning|afternoon|evening|night/gi) || [];
return timeMatches;
}
}
// === PARSER MÉDIA ===
class MediaParser {
async parse(mediaData, options = {}) {
logSh('🎵 MediaParser - Analyse médias', 'INFO');
const result = {
audio: [],
images: [],
metadata: {},
format: 'media'
};
if (Array.isArray(mediaData)) {
mediaData.forEach(file => {
if (this.isAudioFile(file)) {
result.audio.push(this.parseAudioFile(file));
} else if (this.isImageFile(file)) {
result.images.push(this.parseImageFile(file));
}
});
}
return result;
}
isAudioFile(file) {
const audioExtensions = ['mp3', 'wav', 'ogg', 'm4a', 'flac'];
const extension = this.getFileExtension(file.name || file);
return audioExtensions.includes(extension.toLowerCase());
}
isImageFile(file) {
const imageExtensions = ['jpg', 'jpeg', 'png', 'gif', 'webp', 'svg'];
const extension = this.getFileExtension(file.name || file);
return imageExtensions.includes(extension.toLowerCase());
}
getFileExtension(filename) {
return filename.split('.').pop() || '';
}
parseAudioFile(file) {
return {
name: file.name,
path: file.path || file.name,
type: 'audio',
extension: this.getFileExtension(file.name),
associatedWord: this.extractWordFromFilename(file.name),
metadata: {
size: file.size,
duration: file.duration || null
}
};
}
parseImageFile(file) {
return {
name: file.name,
path: file.path || file.name,
type: 'image',
extension: this.getFileExtension(file.name),
associatedWord: this.extractWordFromFilename(file.name),
metadata: {
size: file.size,
width: file.width || null,
height: file.height || null
}
};
}
extractWordFromFilename(filename) {
// Extraire le mot du nom de fichier: "cat.mp3" -> "cat"
return filename.split('.')[0].replace(/[_-]/g, ' ').trim();
}
}
// Export global
window.TextParser = TextParser;
window.CSVParser = CSVParser;
window.JSONParser = JSONParser;
window.DialogueParser = DialogueParser;
window.SequenceParser = SequenceParser;
window.MediaParser = MediaParser;