seo-generator-server/lib/pipeline/PipelineExecutor.js
StillHammer 3751ab047b feat(keywords): Add hierarchical context to missing keywords prompt and fix LLM response format
This commit improves keyword generation by providing hierarchical context for each element and fixing the LLM response format parsing.

Changes:
1. lib/MissingKeywords.js:
   - Add buildHierarchicalContext() to generate compact contextual info for each element
   - Display hierarchy in prompt (e.g., "H2 existants: 'Titre1', 'Titre2'")
   - For Txt elements: show associated MC keyword + parent title
   - For FAQ elements: count existing FAQs
   - Fix LLM response format by providing 3 concrete examples from actual list
   - Add explicit warning to use exact tag names [Titre_H2_3], [Txt_H2_6]
   - Improve getElementContext() to better retrieve hierarchical elements

2. lib/selective-enhancement/SelectiveUtils.js:
   - Fix createTypedPrompt() to use specific keyword from resolvedContent
   - Remove fallback to csvData.mc0 (log error if no specific keyword)

3. lib/pipeline/PipelineExecutor.js:
   - Integrate generateMissingSheetVariables() as "Étape 0" before extraction

Prompt format now:
  1. [Titre_H2_3] (titre) — H2 existants: "Titre1", "Titre2"
  2. [Txt_H2_6] (texte) — MC: "Plaque dibond" | Parent: "Guide dibond"
  3. [Faq_q_1] (question) — 3 FAQ existantes

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-12 14:51:01 +08:00

560 lines
19 KiB
JavaScript

/**
* PipelineExecutor.js
*
* Moteur d'exécution des pipelines modulaires flexibles.
* Orchestre l'exécution séquentielle des modules avec gestion d'état.
*/
const { logSh } = require('../ErrorReporting');
const { tracer } = require('../trace');
const { PipelineDefinition } = require('./PipelineDefinition');
const { getPersonalities, readInstructionsData, selectPersonalityWithAI } = require('../BrainConfig');
const { extractElements, buildSmartHierarchy } = require('../ElementExtraction');
const { generateMissingKeywords, generateMissingSheetVariables } = require('../MissingKeywords');
const { injectGeneratedContent } = require('../ContentAssembly');
const { saveGeneratedArticleOrganic } = require('../ArticleStorage');
// Modules d'exécution
const { generateSimple } = require('../selective-enhancement/SelectiveUtils');
const { applySelectiveLayer } = require('../selective-enhancement/SelectiveCore');
const { applyPredefinedStack: applySelectiveStack } = require('../selective-enhancement/SelectiveLayers');
const { applyAdversarialLayer } = require('../adversarial-generation/AdversarialCore');
const { applyPredefinedStack: applyAdversarialStack } = require('../adversarial-generation/AdversarialLayers');
const { applyHumanSimulationLayer } = require('../human-simulation/HumanSimulationCore');
const { applyPredefinedSimulation } = require('../human-simulation/HumanSimulationLayers');
const { applyPatternBreakingLayer } = require('../pattern-breaking/PatternBreakingCore');
const { applyPatternBreakingStack } = require('../pattern-breaking/PatternBreakingLayers');
/**
* Classe PipelineExecutor
*/
class PipelineExecutor {
constructor() {
this.currentContent = null;
this.executionLog = [];
this.checkpoints = [];
this.versionHistory = []; // ✅ Historique des versions sauvegardées
this.parentArticleId = null; // ✅ ID parent pour versioning
this.csvData = null; // ✅ Données CSV pour sauvegarde
this.finalElements = null; // ✅ Éléments extraits pour assemblage
this.metadata = {
startTime: null,
endTime: null,
totalDuration: 0,
personality: null
};
}
/**
* Exécute un pipeline complet
*/
async execute(pipelineConfig, rowNumber, options = {}) {
return tracer.run('PipelineExecutor.execute', async () => {
logSh(`🚀 Démarrage pipeline "${pipelineConfig.name}" (${pipelineConfig.pipeline.length} étapes)`, 'INFO');
// Validation
const validation = PipelineDefinition.validate(pipelineConfig);
if (!validation.valid) {
throw new Error(`Pipeline invalide: ${validation.errors.join(', ')}`);
}
this.metadata.startTime = Date.now();
this.executionLog = [];
this.checkpoints = [];
this.versionHistory = []; // ✅ Reset version history
this.parentArticleId = null; // ✅ Reset parent ID
// Charger les données
const csvData = await this.loadData(rowNumber);
this.csvData = csvData; // ✅ Stocker pour sauvegarde
// Exécuter les étapes
const enabledSteps = pipelineConfig.pipeline.filter(s => s.enabled !== false);
for (let i = 0; i < enabledSteps.length; i++) {
const step = enabledSteps[i];
try {
logSh(`▶ Étape ${step.step}/${pipelineConfig.pipeline.length}: ${step.module} (${step.mode})`, 'INFO');
const stepStartTime = Date.now();
const result = await this.executeStep(step, csvData, options);
const stepDuration = Date.now() - stepStartTime;
// Log l'étape
this.executionLog.push({
step: step.step,
module: step.module,
mode: step.mode,
intensity: step.intensity,
duration: stepDuration,
modifications: result.modifications || 0,
success: true,
timestamp: new Date().toISOString()
});
// Mise à jour du contenu
if (result.content) {
this.currentContent = result.content;
}
// Checkpoint si demandé
if (step.saveCheckpoint) {
this.checkpoints.push({
step: step.step,
content: this.currentContent,
timestamp: new Date().toISOString()
});
logSh(`💾 Checkpoint sauvegardé (étape ${step.step})`, 'DEBUG');
}
// ✅ Sauvegarde Google Sheets si activée
if (options.saveIntermediateSteps && this.currentContent) {
await this.saveStepVersion(step, result.modifications || 0, pipelineConfig.name);
}
logSh(`✔ Étape ${step.step} terminée (${stepDuration}ms, ${result.modifications || 0} modifs)`, 'INFO');
} catch (error) {
logSh(`✖ Erreur étape ${step.step}: ${error.message}`, 'ERROR');
this.executionLog.push({
step: step.step,
module: step.module,
mode: step.mode,
success: false,
error: error.message,
timestamp: new Date().toISOString()
});
// Propager l'erreur ou continuer selon options
if (options.stopOnError !== false) {
throw error;
}
}
}
this.metadata.endTime = Date.now();
this.metadata.totalDuration = this.metadata.endTime - this.metadata.startTime;
logSh(`✅ Pipeline terminé: ${this.metadata.totalDuration}ms`, 'INFO');
return {
success: true,
finalContent: this.currentContent,
executionLog: this.executionLog,
checkpoints: this.checkpoints,
versionHistory: this.versionHistory, // ✅ Inclure version history
metadata: {
...this.metadata,
pipelineName: pipelineConfig.name,
totalSteps: enabledSteps.length,
successfulSteps: this.executionLog.filter(l => l.success).length
}
};
}, { pipelineName: pipelineConfig.name, rowNumber });
}
/**
* Charge les données depuis Google Sheets
*/
async loadData(rowNumber) {
return tracer.run('PipelineExecutor.loadData', async () => {
const csvData = await readInstructionsData(rowNumber);
// Charger personnalité si besoin
const personalities = await getPersonalities();
const personality = await selectPersonalityWithAI(
csvData.mc0,
csvData.t0,
personalities
);
csvData.personality = personality;
this.metadata.personality = personality.nom;
logSh(`📊 Données chargées: ${csvData.mc0}, personnalité: ${personality.nom}`, 'DEBUG');
return csvData;
}, { rowNumber });
}
/**
* Exécute une étape individuelle
*/
async executeStep(step, csvData, options) {
return tracer.run(`PipelineExecutor.executeStep.${step.module}`, async () => {
switch (step.module) {
case 'generation':
return await this.runGeneration(step, csvData);
case 'selective':
return await this.runSelective(step, csvData);
case 'adversarial':
return await this.runAdversarial(step, csvData);
case 'human':
return await this.runHumanSimulation(step, csvData);
case 'pattern':
return await this.runPatternBreaking(step, csvData);
default:
throw new Error(`Module inconnu: ${step.module}`);
}
}, { step: step.step, module: step.module, mode: step.mode });
}
/**
* Exécute la génération initiale
*/
async runGeneration(step, csvData) {
return tracer.run('PipelineExecutor.runGeneration', async () => {
if (this.currentContent) {
logSh('⚠️ Contenu déjà généré, génération ignorée', 'WARN');
return { content: this.currentContent, modifications: 0 };
}
// 🆕 Étape 0: Générer les variables Google Sheets manquantes (MC+1_5, T+1_6, etc.)
logSh('🔄 Vérification variables Google Sheets...', 'DEBUG');
const updatedCsvData = await generateMissingSheetVariables(csvData.xmlTemplate, csvData);
// Mettre à jour csvData pour les étapes suivantes
Object.assign(csvData, updatedCsvData);
// Étape 1: Extraire les éléments depuis le template XML (avec csvData complet)
const elements = await extractElements(csvData.xmlTemplate, csvData);
logSh(`✓ Extraction: ${elements.length} éléments extraits`, 'DEBUG');
// Étape 2: Générer les mots-clés manquants (titres, textes, FAQ)
const finalElements = await generateMissingKeywords(elements, csvData);
this.finalElements = finalElements; // ✅ Stocker pour sauvegarde
// Étape 3: Construire la hiérarchie
const elementsArray = Array.isArray(finalElements) ? finalElements :
(finalElements && typeof finalElements === 'object') ? Object.values(finalElements) : [];
const hierarchy = await buildSmartHierarchy(elementsArray);
logSh(`✓ Hiérarchie: ${Object.keys(hierarchy).length} sections`, 'DEBUG');
// Étape 4: Génération simple avec LLM configurable
const llmProvider = step.parameters?.llmProvider || 'claude-sonnet-4-5';
const result = await generateSimple(hierarchy, csvData, { llmProvider });
logSh(`✓ Génération: ${Object.keys(result.content || {}).length} éléments créés avec ${llmProvider}`, 'DEBUG');
return {
content: result.content,
modifications: Object.keys(result.content || {}).length
};
}, { mode: step.mode });
}
/**
* Exécute l'enhancement sélectif
*/
async runSelective(step, csvData) {
return tracer.run('PipelineExecutor.runSelective', async () => {
if (!this.currentContent) {
throw new Error('Aucun contenu à améliorer. Génération requise avant selective enhancement');
}
// Configuration de la couche
const llmProvider = step.parameters?.llmProvider || 'gpt-4o-mini';
const config = {
csvData,
personality: csvData.personality,
intensity: step.intensity || 1.0,
llmProvider: llmProvider,
...step.parameters
};
let result;
// Utiliser le stack si c'est un mode prédéfini
const predefinedStacks = ['lightEnhancement', 'standardEnhancement', 'fullEnhancement', 'personalityFocus', 'fluidityFocus', 'adaptive'];
if (predefinedStacks.includes(step.mode)) {
result = await applySelectiveStack(this.currentContent, step.mode, config);
} else {
// Sinon utiliser la couche directe
result = await applySelectiveLayer(this.currentContent, config);
}
logSh(`✓ Selective: modifications appliquées avec ${llmProvider}`, 'DEBUG');
return {
content: result.content || result,
modifications: result.modifications || 0 // ✅ CORRIGÉ: modifications au lieu de modificationsCount
};
}, { mode: step.mode, intensity: step.intensity });
}
/**
* Exécute l'adversarial generation
*/
async runAdversarial(step, csvData) {
return tracer.run('PipelineExecutor.runAdversarial', async () => {
if (!this.currentContent) {
throw new Error('Aucun contenu à traiter. Génération requise avant adversarial');
}
if (step.mode === 'none') {
logSh('Adversarial mode = none, ignoré', 'DEBUG');
return { content: this.currentContent, modifications: 0 };
}
const llmProvider = step.parameters?.llmProvider || 'gemini-pro';
const config = {
csvData,
detectorTarget: step.parameters?.detector || 'general',
method: step.parameters?.method || 'regeneration',
intensity: step.intensity || 1.0,
llmProvider: llmProvider
};
let result;
// Mapper les noms user-friendly vers les vrais noms de stacks
const stackMapping = {
'light': 'lightDefense',
'standard': 'standardDefense',
'heavy': 'heavyDefense',
'adaptive': 'adaptive'
};
// Utiliser le stack si c'est un mode prédéfini
if (stackMapping[step.mode]) {
const stackName = stackMapping[step.mode];
if (stackName === 'adaptive') {
// Mode adaptatif utilise la couche directe
result = await applyAdversarialLayer(this.currentContent, config);
} else {
result = await applyAdversarialStack(this.currentContent, stackName, config);
}
} else {
// Sinon utiliser la couche directe
result = await applyAdversarialLayer(this.currentContent, config);
}
logSh(`✓ Adversarial: modifications appliquées avec ${llmProvider}`, 'DEBUG');
return {
content: result.content || result,
modifications: result.modifications || 0 // ✅ CORRIGÉ: modifications au lieu de modificationsCount
};
}, { mode: step.mode, detector: step.parameters?.detector });
}
/**
* Exécute la simulation humaine
*/
async runHumanSimulation(step, csvData) {
return tracer.run('PipelineExecutor.runHumanSimulation', async () => {
if (!this.currentContent) {
throw new Error('Aucun contenu à traiter. Génération requise avant human simulation');
}
if (step.mode === 'none') {
logSh('Human simulation mode = none, ignoré', 'DEBUG');
return { content: this.currentContent, modifications: 0 };
}
const llmProvider = step.parameters?.llmProvider || 'mistral-small';
const config = {
csvData,
personality: csvData.personality,
intensity: step.intensity || 1.0,
fatigueLevel: step.parameters?.fatigueLevel || 0.5,
errorRate: step.parameters?.errorRate || 0.3,
llmProvider: llmProvider
};
let result;
// Utiliser le stack si c'est un mode prédéfini
const predefinedModes = ['lightSimulation', 'standardSimulation', 'heavySimulation', 'adaptiveSimulation', 'personalityFocus', 'temporalFocus'];
if (predefinedModes.includes(step.mode)) {
result = await applyPredefinedSimulation(this.currentContent, step.mode, config);
} else {
// Sinon utiliser la couche directe
result = await applyHumanSimulationLayer(this.currentContent, config);
}
logSh(`✓ Human Simulation: modifications appliquées avec ${llmProvider}`, 'DEBUG');
return {
content: result.content || result,
modifications: result.modifications || 0 // ✅ CORRIGÉ: modifications au lieu de modificationsCount
};
}, { mode: step.mode, intensity: step.intensity });
}
/**
* Exécute le pattern breaking
*/
async runPatternBreaking(step, csvData) {
return tracer.run('PipelineExecutor.runPatternBreaking', async () => {
if (!this.currentContent) {
throw new Error('Aucun contenu à traiter. Génération requise avant pattern breaking');
}
if (step.mode === 'none') {
logSh('Pattern breaking mode = none, ignoré', 'DEBUG');
return { content: this.currentContent, modifications: 0 };
}
const llmProvider = step.parameters?.llmProvider || 'deepseek-chat';
const config = {
csvData,
personality: csvData.personality,
intensity: step.intensity || 1.0,
focus: step.parameters?.focus || 'both',
llmProvider: llmProvider
};
let result;
// Utiliser le stack si c'est un mode prédéfini
const predefinedModes = ['lightPatternBreaking', 'standardPatternBreaking', 'heavyPatternBreaking', 'adaptivePatternBreaking', 'syntaxFocus', 'connectorsFocus'];
if (predefinedModes.includes(step.mode)) {
result = await applyPatternBreakingStack(step.mode, this.currentContent, config);
} else {
// Sinon utiliser la couche directe
result = await applyPatternBreakingLayer(this.currentContent, config);
}
logSh(`✓ Pattern Breaking: modifications appliquées avec ${llmProvider}`, 'DEBUG');
return {
content: result.content || result,
modifications: result.modifications || 0 // ✅ CORRIGÉ: modifications au lieu de modificationsCount
};
}, { mode: step.mode, intensity: step.intensity });
}
/**
* Obtient le contenu actuel
*/
getCurrentContent() {
return this.currentContent;
}
/**
* Obtient le log d'exécution
*/
getExecutionLog() {
return this.executionLog;
}
/**
* Obtient les checkpoints sauvegardés
*/
getCheckpoints() {
return this.checkpoints;
}
/**
* Obtient les métadonnées d'exécution
*/
getMetadata() {
return this.metadata;
}
/**
* Reset l'état de l'executor
*/
reset() {
this.currentContent = null;
this.executionLog = [];
this.checkpoints = [];
this.versionHistory = [];
this.parentArticleId = null;
this.csvData = null;
this.finalElements = null;
this.metadata = {
startTime: null,
endTime: null,
totalDuration: 0,
personality: null
};
}
/**
* ✅ Sauvegarde une version intermédiaire dans Google Sheets
*/
async saveStepVersion(step, modifications, pipelineName) {
try {
if (!this.csvData || !this.finalElements) {
logSh('⚠️ Données manquantes pour sauvegarde, ignorée', 'WARN');
return;
}
// Déterminer la version basée sur le module et le nombre d'étapes
const versionNumber = `v1.${step.step}`;
const stageName = `${step.module}_${step.mode}`;
logSh(`💾 Sauvegarde ${versionNumber}: ${stageName}`, 'INFO');
// Assemblage du contenu
const xmlString = this.csvData.xmlTemplate.startsWith('<?xml')
? this.csvData.xmlTemplate
: Buffer.from(this.csvData.xmlTemplate, 'base64').toString('utf8');
await injectGeneratedContent(xmlString, this.currentContent, this.finalElements);
// Sauvegarde dans Google Sheets
const storage = await saveGeneratedArticleOrganic(
{ generatedTexts: this.currentContent },
this.csvData,
{
version: versionNumber,
stage: stageName,
source: `pipeline_${pipelineName}`,
adversarialMode: step.mode === 'adversarial' ? step.mode : 'none',
stageDescription: `${step.module} (${step.mode}) - ${modifications} modifications`,
parentArticleId: this.parentArticleId,
useVersionedSheet: true // ✅ Sauvegarder dans Generated_Articles_Versioned
}
);
// Stocker l'ID parent si c'est la première version
if (!this.parentArticleId) {
this.parentArticleId = storage.articleId;
}
// Ajouter à l'historique
this.versionHistory.push({
version: versionNumber,
stage: stageName,
articleId: storage.articleId,
length: storage.textLength,
wordCount: storage.wordCount,
modifications: modifications
});
logSh(` ✅ Sauvé ${versionNumber} - ID: ${storage.articleId}`, 'INFO');
} catch (error) {
logSh(`❌ Erreur sauvegarde version: ${error.message}`, 'ERROR');
// Ne pas propager l'erreur pour ne pas bloquer l'exécution
}
}
}
module.exports = { PipelineExecutor };