Added plan.md with complete architecture for format-agnostic content generation: - Support for Markdown, HTML, Plain Text, JSON formats - New FormatExporter module with neutral data structure - Integration strategy with existing ContentAssembly and ArticleStorage - Bonus features: SEO metadata generation, readability scoring, WordPress Gutenberg format - Implementation roadmap with 4 phases (6h total estimated) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
425 lines
14 KiB
JavaScript
425 lines
14 KiB
JavaScript
// ========================================
|
|
// FICHIER: lib/element-extraction.js - CONVERTI POUR NODE.JS
|
|
// Description: Extraction et parsing des éléments XML
|
|
// ========================================
|
|
|
|
// 🔄 NODE.JS IMPORTS
|
|
const { logSh } = require('./ErrorReporting');
|
|
|
|
// ============= EXTRACTION PRINCIPALE =============
|
|
|
|
async function extractElements(xmlTemplate, csvData) {
|
|
try {
|
|
await logSh('Extraction éléments avec séparation tag/contenu...', 'DEBUG');
|
|
|
|
const regex = /\|([^|]+)\|/g;
|
|
const elements = [];
|
|
let match;
|
|
|
|
while ((match = regex.exec(xmlTemplate)) !== null) {
|
|
const fullMatch = match[1]; // Ex: "Titre_H1_1{{T0}}" ou "Titre_H3_3{{MC+1_3}}"
|
|
|
|
// Séparer nom du tag et variables
|
|
const nameMatch = fullMatch.match(/^([^{]+)/);
|
|
const variablesMatch = fullMatch.match(/\{\{([^}]+)\}\}/g);
|
|
|
|
// FIX REGEX INSTRUCTIONS - Enlever d'abord les {{variables}} puis chercher {instructions}
|
|
const withoutVariables = fullMatch.replace(/\{\{[^}]+\}\}/g, '');
|
|
const instructionsMatch = withoutVariables.match(/\{([^}]+)\}/);
|
|
|
|
const tagName = nameMatch ? nameMatch[1].trim() : fullMatch.split('{')[0];
|
|
|
|
// TAG PUR (sans variables)
|
|
const pureTag = `|${tagName}|`;
|
|
|
|
// RÉSOUDRE le contenu des variables
|
|
const resolvedContent = resolveVariablesContent(variablesMatch, csvData);
|
|
|
|
elements.push({
|
|
originalTag: pureTag, // ← TAG PUR : |Titre_H3_3|
|
|
name: tagName, // ← Titre_H3_3
|
|
variables: variablesMatch || [], // ← [{{MC+1_3}}]
|
|
resolvedContent: resolvedContent, // ← "Plaque de rue en aluminium"
|
|
instructions: instructionsMatch ? instructionsMatch[1] : null,
|
|
type: getElementType(tagName),
|
|
originalFullMatch: fullMatch // ← Backup si besoin
|
|
});
|
|
|
|
await logSh(`Tag séparé: ${pureTag} → "${resolvedContent}"`, 'DEBUG');
|
|
}
|
|
|
|
await logSh(`${elements.length} éléments extraits avec séparation`, 'INFO');
|
|
return elements;
|
|
|
|
} catch (error) {
|
|
await logSh(`Erreur extractElements: ${error}`, 'ERROR');
|
|
return [];
|
|
}
|
|
}
|
|
|
|
// ============= RÉSOLUTION VARIABLES - IDENTIQUE =============
|
|
|
|
function resolveVariablesContent(variablesMatch, csvData) {
|
|
if (!variablesMatch || variablesMatch.length === 0) {
|
|
return ""; // Pas de variables à résoudre
|
|
}
|
|
|
|
let resolvedContent = "";
|
|
|
|
variablesMatch.forEach(variable => {
|
|
const cleanVar = variable.replace(/[{}]/g, ''); // Enlever {{ }}
|
|
|
|
switch (cleanVar) {
|
|
case 'T0':
|
|
resolvedContent += csvData.t0;
|
|
break;
|
|
case 'MC0':
|
|
resolvedContent += csvData.mc0;
|
|
break;
|
|
case 'T-1':
|
|
resolvedContent += csvData.tMinus1;
|
|
break;
|
|
case 'L-1':
|
|
resolvedContent += csvData.lMinus1;
|
|
break;
|
|
default:
|
|
// Gérer MC+1_1, MC+1_2, etc.
|
|
if (cleanVar.startsWith('MC+1_')) {
|
|
const index = parseInt(cleanVar.split('_')[1]) - 1;
|
|
const mcPlus1 = csvData.mcPlus1.split(',').map(s => s.trim());
|
|
resolvedContent += mcPlus1[index] || `[${cleanVar} non défini]`;
|
|
}
|
|
else if (cleanVar.startsWith('T+1_')) {
|
|
const index = parseInt(cleanVar.split('_')[1]) - 1;
|
|
const tPlus1 = csvData.tPlus1.split(',').map(s => s.trim());
|
|
resolvedContent += tPlus1[index] || `[${cleanVar} non défini]`;
|
|
}
|
|
else if (cleanVar.startsWith('L+1_')) {
|
|
const index = parseInt(cleanVar.split('_')[1]) - 1;
|
|
const lPlus1 = csvData.lPlus1.split(',').map(s => s.trim());
|
|
resolvedContent += lPlus1[index] || `[${cleanVar} non défini]`;
|
|
}
|
|
else {
|
|
resolvedContent += `[${cleanVar} non résolu]`;
|
|
}
|
|
break;
|
|
}
|
|
});
|
|
|
|
return resolvedContent;
|
|
}
|
|
|
|
// ============= CLASSIFICATION ÉLÉMENTS - IDENTIQUE =============
|
|
|
|
function getElementType(name) {
|
|
if (name.includes('Titre_H1')) return 'titre_h1';
|
|
if (name.includes('Titre_H2')) return 'titre_h2';
|
|
if (name.includes('Titre_H3')) return 'titre_h3';
|
|
if (name.includes('Intro_')) return 'intro';
|
|
if (name.includes('Txt_')) return 'texte';
|
|
if (name.includes('Faq_q')) return 'faq_question';
|
|
if (name.includes('Faq_a')) return 'faq_reponse';
|
|
if (name.includes('Faq_H3')) return 'faq_titre';
|
|
return 'autre';
|
|
}
|
|
|
|
// ============= GÉNÉRATION SÉQUENTIELLE - ADAPTÉE =============
|
|
|
|
async function generateAllContent(elements, csvData, xmlTemplate) {
|
|
await logSh(`Début génération pour ${elements.length} éléments`, 'INFO');
|
|
|
|
const generatedContent = {};
|
|
|
|
for (let index = 0; index < elements.length; index++) {
|
|
const element = elements[index];
|
|
|
|
try {
|
|
await logSh(`Élément ${index + 1}/${elements.length}: ${element.name}`, 'DEBUG');
|
|
|
|
const prompt = createPromptForElement(element, csvData);
|
|
await logSh(`Prompt créé: ${prompt}`, 'DEBUG');
|
|
|
|
// 🔄 NODE.JS : Import callOpenAI depuis LLM manager
|
|
const { callLLM } = require('./LLMManager');
|
|
const content = await callLLM('openai', prompt, {}, csvData.personality);
|
|
|
|
await logSh(`Contenu reçu: ${content}`, 'DEBUG');
|
|
|
|
generatedContent[element.originalTag] = content;
|
|
|
|
// 🔄 NODE.JS : Pas de Utilities.sleep(), les appels API gèrent leur rate limiting
|
|
|
|
} catch (error) {
|
|
await logSh(`ERREUR élément ${element.name}: ${error.toString()}`, 'ERROR');
|
|
generatedContent[element.originalTag] = `[Erreur génération: ${element.name}]`;
|
|
}
|
|
}
|
|
|
|
await logSh(`Génération terminée. ${Object.keys(generatedContent).length} éléments`, 'INFO');
|
|
return generatedContent;
|
|
}
|
|
|
|
// ============= PARSING STRUCTURE - IDENTIQUE =============
|
|
|
|
function parseElementStructure(element) {
|
|
// NETTOYER le nom : enlever <strong>, </strong>, {{...}}, {...}
|
|
let cleanName = element.name
|
|
.replace(/<\/?strong>/g, '') // ← ENLEVER <strong>
|
|
.replace(/\{\{[^}]*\}\}/g, '') // Enlever {{MC0}}
|
|
.replace(/\{[^}]*\}/g, ''); // Enlever {instructions}
|
|
|
|
const parts = cleanName.split('_');
|
|
|
|
return {
|
|
type: parts[0],
|
|
level: parts[1],
|
|
indices: parts.slice(2).map(Number),
|
|
hierarchyPath: parts.slice(1).join('_'),
|
|
originalElement: element,
|
|
variables: element.variables || [],
|
|
instructions: element.instructions
|
|
};
|
|
}
|
|
|
|
// ============= HIÉRARCHIE INTELLIGENTE - ADAPTÉE =============
|
|
|
|
async function buildSmartHierarchy(elements) {
|
|
const hierarchy = {};
|
|
|
|
elements.forEach(element => {
|
|
const structure = parseElementStructure(element);
|
|
const path = structure.hierarchyPath;
|
|
|
|
if (!hierarchy[path]) {
|
|
hierarchy[path] = {
|
|
title: null,
|
|
text: null,
|
|
questions: [],
|
|
children: {}
|
|
};
|
|
}
|
|
|
|
// Associer intelligemment
|
|
if (structure.type === 'Titre') {
|
|
hierarchy[path].title = structure; // Tout l'objet avec variables + instructions
|
|
} else if (structure.type === 'Txt') {
|
|
hierarchy[path].text = structure;
|
|
} else if (structure.type === 'Intro') {
|
|
hierarchy[path].text = structure;
|
|
} else if (structure.type === 'Faq') {
|
|
hierarchy[path].questions.push(structure);
|
|
}
|
|
});
|
|
|
|
// ← LIGNE COMPILÉE
|
|
const mappingSummary = Object.keys(hierarchy).map(path => {
|
|
const section = hierarchy[path];
|
|
return `${path}:[T:${section.title ? '✓' : '✗'} Txt:${section.text ? '✓' : '✗'} FAQ:${section.questions.length}]`;
|
|
}).join(' | ');
|
|
|
|
await logSh('Correspondances: ' + mappingSummary, 'DEBUG');
|
|
|
|
return hierarchy;
|
|
}
|
|
|
|
// ============= PARSERS RÉPONSES - ADAPTÉS =============
|
|
|
|
async function parseTitlesResponse(response, allTitles) {
|
|
const results = {};
|
|
|
|
// Utiliser regex pour extraire [TAG] contenu
|
|
const regex = /\[([^\]]+)\]\s*\n([^[]*?)(?=\n\[|$)/gs;
|
|
let match;
|
|
|
|
while ((match = regex.exec(response)) !== null) {
|
|
const tag = match[1].trim();
|
|
const content = match[2].trim();
|
|
|
|
// Nettoyer le contenu (enlever # et balises HTML si présentes)
|
|
const cleanContent = content
|
|
.replace(/^#+\s*/, '') // Enlever # du début
|
|
.replace(/<\/?[^>]+(>|$)/g, ""); // Enlever balises HTML
|
|
|
|
results[`|${tag}|`] = cleanContent;
|
|
|
|
await logSh(`✓ Titre parsé [${tag}]: "${cleanContent}"`, 'DEBUG');
|
|
}
|
|
|
|
// Fallback si parsing échoue
|
|
if (Object.keys(results).length === 0) {
|
|
await logSh('Parsing titres échoué, fallback ligne par ligne', 'WARNING');
|
|
const lines = response.split('\n').filter(line => line.trim());
|
|
|
|
allTitles.forEach((titleInfo, index) => {
|
|
if (lines[index]) {
|
|
results[titleInfo.tag] = lines[index].trim();
|
|
}
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function parseTextsResponse(response, allTexts) {
|
|
const results = {};
|
|
|
|
await logSh('Parsing réponse textes avec vrais tags...', 'DEBUG');
|
|
|
|
// Utiliser regex pour extraire [TAG] contenu avec les vrais noms
|
|
const regex = /\[([^\]]+)\]\s*\n([^[]*?)(?=\n\[|$)/gs;
|
|
let match;
|
|
|
|
while ((match = regex.exec(response)) !== null) {
|
|
const tag = match[1].trim();
|
|
const content = match[2].trim();
|
|
|
|
// Nettoyer le contenu
|
|
const cleanContent = content.replace(/^#+\s*/, '').replace(/<\/?[^>]+(>|$)/g, "");
|
|
|
|
results[`|${tag}|`] = cleanContent;
|
|
|
|
await logSh(`✓ Texte parsé [${tag}]: "${cleanContent}"`, 'DEBUG');
|
|
}
|
|
|
|
// Fallback si parsing échoue - mapper par position
|
|
if (Object.keys(results).length === 0) {
|
|
await logSh('Parsing textes échoué, fallback ligne par ligne', 'WARNING');
|
|
|
|
const lines = response.split('\n')
|
|
.map(line => line.trim())
|
|
.filter(line => line.length > 0 && !line.startsWith('['));
|
|
|
|
for (let index = 0; index < allTexts.length; index++) {
|
|
const textInfo = allTexts[index];
|
|
if (index < lines.length) {
|
|
let content = lines[index];
|
|
content = content.replace(/^\d+\.\s*/, ''); // Enlever "1. " si présent
|
|
results[textInfo.tag] = content;
|
|
|
|
await logSh(`✓ Texte fallback ${index + 1} → ${textInfo.tag}: "${content}"`, 'DEBUG');
|
|
} else {
|
|
await logSh(`✗ Pas assez de lignes pour ${textInfo.tag}`, 'WARNING');
|
|
results[textInfo.tag] = `[Texte manquant ${index + 1}]`;
|
|
}
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
// ============= PARSER FAQ SPÉCIALISÉ - ADAPTÉ =============
|
|
|
|
async function parseFAQPairsResponse(response, faqPairs) {
|
|
const results = {};
|
|
|
|
await logSh('Parsing réponse paires FAQ...', 'DEBUG');
|
|
|
|
// Parser avec regex pour capturer question + réponse
|
|
const regex = /\[([^\]]+)\]\s*\n([^[]*?)(?=\n\[|$)/gs;
|
|
let match;
|
|
|
|
const parsedItems = {};
|
|
|
|
while ((match = regex.exec(response)) !== null) {
|
|
const tag = match[1].trim();
|
|
const content = match[2].trim();
|
|
|
|
const cleanContent = content.replace(/^#+\s*/, '').replace(/<\/?[^>]+(>|$)/g, "");
|
|
|
|
parsedItems[tag] = cleanContent;
|
|
|
|
await logSh(`✓ Item FAQ parsé [${tag}]: "${cleanContent}"`, 'DEBUG');
|
|
}
|
|
|
|
// Mapper aux tags originaux avec |
|
|
Object.keys(parsedItems).forEach(cleanTag => {
|
|
const content = parsedItems[cleanTag];
|
|
results[`|${cleanTag}|`] = content;
|
|
});
|
|
|
|
// Vérification de cohérence paires
|
|
let pairsCompletes = 0;
|
|
for (const pair of faqPairs) {
|
|
const hasQuestion = results[pair.question.tag];
|
|
const hasAnswer = results[pair.answer.tag];
|
|
|
|
if (hasQuestion && hasAnswer) {
|
|
pairsCompletes++;
|
|
await logSh(`✓ Paire FAQ ${pair.number} complète: Q+R`, 'DEBUG');
|
|
} else {
|
|
await logSh(`⚠ Paire FAQ ${pair.number} incomplète: Q=${!!hasQuestion} R=${!!hasAnswer}`, 'WARNING');
|
|
}
|
|
}
|
|
|
|
await logSh(`${pairsCompletes}/${faqPairs.length} paires FAQ complètes`, 'INFO');
|
|
|
|
// FATAL si paires FAQ manquantes
|
|
if (pairsCompletes < faqPairs.length) {
|
|
const manquantes = faqPairs.length - pairsCompletes;
|
|
await logSh(`❌ FATAL: ${manquantes} paires FAQ manquantes sur ${faqPairs.length}`, 'ERROR');
|
|
throw new Error(`FATAL: Génération FAQ incomplète (${manquantes}/${faqPairs.length} manquantes) - arrêt du workflow`);
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function parseOtherElementsResponse(response, allOtherElements) {
|
|
const results = {};
|
|
|
|
await logSh('Parsing réponse autres éléments...', 'DEBUG');
|
|
|
|
const regex = /\[([^\]]+)\]\s*\n([^[]*?)(?=\n\[|$)/gs;
|
|
let match;
|
|
|
|
while ((match = regex.exec(response)) !== null) {
|
|
const tag = match[1].trim();
|
|
const content = match[2].trim();
|
|
|
|
const cleanContent = content.replace(/^#+\s*/, '').replace(/<\/?[^>]+(>|$)/g, "");
|
|
|
|
results[`|${tag}|`] = cleanContent;
|
|
|
|
await logSh(`✓ Autre élément parsé [${tag}]: "${cleanContent}"`, 'DEBUG');
|
|
}
|
|
|
|
// Fallback si parsing partiel
|
|
if (Object.keys(results).length < allOtherElements.length) {
|
|
await logSh('Parsing autres éléments partiel, complétion fallback', 'WARNING');
|
|
|
|
const lines = response.split('\n')
|
|
.map(line => line.trim())
|
|
.filter(line => line.length > 0 && !line.startsWith('['));
|
|
|
|
allOtherElements.forEach((element, index) => {
|
|
if (!results[element.tag] && lines[index]) {
|
|
results[element.tag] = lines[index];
|
|
}
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
// ============= HELPER FUNCTIONS - ADAPTÉES =============
|
|
|
|
function createPromptForElement(element, csvData) {
|
|
// Cette fonction sera probablement définie dans content-generation.js
|
|
// Pour l'instant, retour basique
|
|
return `Génère du contenu pour ${element.type}: ${element.resolvedContent}`;
|
|
}
|
|
|
|
|
|
// 🔄 NODE.JS EXPORTS
|
|
module.exports = {
|
|
extractElements,
|
|
resolveVariablesContent,
|
|
getElementType,
|
|
generateAllContent,
|
|
parseElementStructure,
|
|
buildSmartHierarchy,
|
|
parseTitlesResponse,
|
|
parseTextsResponse,
|
|
parseFAQPairsResponse,
|
|
parseOtherElementsResponse,
|
|
createPromptForElement
|
|
}; |