seogeneratorserver/lib/generation/InitialGeneration.js
StillHammer dbf1a3de8c Add technical plan for multi-format export system
Added plan.md with complete architecture for format-agnostic content generation:
- Support for Markdown, HTML, Plain Text, JSON formats
- New FormatExporter module with neutral data structure
- Integration strategy with existing ContentAssembly and ArticleStorage
- Bonus features: SEO metadata generation, readability scoring, WordPress Gutenberg format
- Implementation roadmap with 4 phases (6h total estimated)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-18 16:14:29 +08:00

389 lines
11 KiB
JavaScript

// ========================================
// ÉTAPE 1: GÉNÉRATION INITIALE
// Responsabilité: Créer le contenu de base avec Claude uniquement
// LLM: Claude Sonnet (température 0.7)
// ========================================
const { callLLM } = require('../LLMManager');
const { logSh } = require('../ErrorReporting');
const { tracer } = require('../trace');
/**
* MAIN ENTRY POINT - GÉNÉRATION INITIALE
* Input: { content: {}, csvData: {}, context: {} }
* Output: { content: {}, stats: {}, debug: {} }
*/
async function generateInitialContent(input) {
return await tracer.run('InitialGeneration.generateInitialContent()', async () => {
const { hierarchy, csvData, context = {} } = input;
await tracer.annotate({
step: '1/4',
llmProvider: 'claude',
elementsCount: Object.keys(hierarchy).length,
mc0: csvData.mc0
});
const startTime = Date.now();
logSh(`🚀 ÉTAPE 1/4: Génération initiale (Claude)`, 'INFO');
logSh(` 📊 ${Object.keys(hierarchy).length} éléments à générer`, 'INFO');
try {
// Collecter tous les éléments dans l'ordre XML
const allElements = collectElementsInXMLOrder(hierarchy);
// Séparer FAQ pairs et autres éléments
const { faqPairs, otherElements } = separateElementTypes(allElements);
// Générer en chunks pour éviter timeouts
const results = {};
// 1. Générer éléments normaux (titres, textes, intro)
if (otherElements.length > 0) {
const normalResults = await generateNormalElements(otherElements, csvData);
Object.assign(results, normalResults);
}
// 2. Générer paires FAQ si présentes
if (faqPairs.length > 0) {
const faqResults = await generateFAQPairs(faqPairs, csvData);
Object.assign(results, faqResults);
}
const duration = Date.now() - startTime;
const stats = {
processed: Object.keys(results).length,
generated: Object.keys(results).length,
faqPairs: faqPairs.length,
duration
};
logSh(`✅ ÉTAPE 1/4 TERMINÉE: ${stats.generated} éléments générés (${duration}ms)`, 'INFO');
await tracer.event(`Génération initiale terminée`, stats);
return {
content: results,
stats,
debug: {
llmProvider: 'claude',
step: 1,
elementsGenerated: Object.keys(results)
}
};
} catch (error) {
const duration = Date.now() - startTime;
logSh(`❌ ÉTAPE 1/4 ÉCHOUÉE après ${duration}ms: ${error.message}`, 'ERROR');
throw new Error(`InitialGeneration failed: ${error.message}`);
}
}, input);
}
/**
* Générer éléments normaux (titres, textes, intro) en chunks
*/
async function generateNormalElements(elements, csvData) {
logSh(`📝 Génération éléments normaux: ${elements.length} éléments`, 'DEBUG');
const results = {};
const chunks = chunkArray(elements, 4); // Chunks de 4 pour éviter timeouts
for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) {
const chunk = chunks[chunkIndex];
logSh(` 📦 Chunk ${chunkIndex + 1}/${chunks.length}: ${chunk.length} éléments`, 'DEBUG');
try {
const prompt = createBatchPrompt(chunk, csvData);
const response = await callLLM('claude', prompt, {
temperature: 0.7,
maxTokens: 2000 * chunk.length
}, csvData.personality);
const chunkResults = parseBatchResponse(response, chunk);
Object.assign(results, chunkResults);
logSh(` ✅ Chunk ${chunkIndex + 1}: ${Object.keys(chunkResults).length} éléments générés`, 'DEBUG');
// Délai entre chunks
if (chunkIndex < chunks.length - 1) {
await sleep(1500);
}
} catch (error) {
logSh(` ❌ Chunk ${chunkIndex + 1} échoué: ${error.message}`, 'ERROR');
throw error;
}
}
return results;
}
/**
* Générer paires FAQ cohérentes
*/
async function generateFAQPairs(faqPairs, csvData) {
logSh(`❓ Génération paires FAQ: ${faqPairs.length} paires`, 'DEBUG');
const prompt = createFAQPairsPrompt(faqPairs, csvData);
const response = await callLLM('claude', prompt, {
temperature: 0.8,
maxTokens: 3000
}, csvData.personality);
return parseFAQResponse(response, faqPairs);
}
/**
* Créer prompt batch pour éléments normaux
*/
function createBatchPrompt(elements, csvData) {
const personality = csvData.personality;
let prompt = `=== GÉNÉRATION CONTENU INITIAL ===
Entreprise: Autocollant.fr - signalétique personnalisée
Sujet: ${csvData.mc0}
Rédacteur: ${personality.nom} (${personality.style})
ÉLÉMENTS À GÉNÉRER:
`;
elements.forEach((elementInfo, index) => {
const cleanTag = elementInfo.tag.replace(/\|/g, '');
prompt += `${index + 1}. [${cleanTag}] - ${getElementDescription(elementInfo)}\n`;
});
prompt += `
STYLE ${personality.nom.toUpperCase()}:
- Vocabulaire: ${personality.vocabulairePref}
- Phrases: ${personality.longueurPhrases}
- Niveau: ${personality.niveauTechnique}
CONSIGNES:
- Contenu SEO optimisé pour ${csvData.mc0}
- Style ${personality.style} naturel
- Pas de références techniques dans contenu
- RÉPONSE DIRECTE par le contenu
FORMAT:
[${elements[0].tag.replace(/\|/g, '')}]
Contenu généré...
[${elements[1] ? elements[1].tag.replace(/\|/g, '') : 'element2'}]
Contenu généré...`;
return prompt;
}
/**
* Parser réponse batch
*/
function parseBatchResponse(response, elements) {
const results = {};
const regex = /\[([^\]]+)\]\s*([^[]*?)(?=\n\[|$)/gs;
let match;
const parsedItems = {};
while ((match = regex.exec(response)) !== null) {
const tag = match[1].trim();
const content = cleanGeneratedContent(match[2].trim());
parsedItems[tag] = content;
}
// Mapper aux vrais tags
elements.forEach(element => {
const cleanTag = element.tag.replace(/\|/g, '');
if (parsedItems[cleanTag] && parsedItems[cleanTag].length > 10) {
results[element.tag] = parsedItems[cleanTag];
} else {
results[element.tag] = `Contenu professionnel pour ${element.element.name || cleanTag}`;
logSh(`⚠️ Fallback pour [${cleanTag}]`, 'WARNING');
}
});
return results;
}
/**
* Créer prompt pour paires FAQ
*/
function createFAQPairsPrompt(faqPairs, csvData) {
const personality = csvData.personality;
let prompt = `=== GÉNÉRATION PAIRES FAQ ===
Sujet: ${csvData.mc0}
Rédacteur: ${personality.nom} (${personality.style})
PAIRES À GÉNÉRER:
`;
faqPairs.forEach((pair, index) => {
const qTag = pair.question.tag.replace(/\|/g, '');
const aTag = pair.answer.tag.replace(/\|/g, '');
prompt += `${index + 1}. [${qTag}] + [${aTag}]\n`;
});
prompt += `
CONSIGNES:
- Questions naturelles de clients
- Réponses expertes ${personality.style}
- Couvrir: prix, livraison, personnalisation
FORMAT:
[${faqPairs[0].question.tag.replace(/\|/g, '')}]
Question client naturelle ?
[${faqPairs[0].answer.tag.replace(/\|/g, '')}]
Réponse utile et rassurante.`;
return prompt;
}
/**
* Parser réponse FAQ
*/
function parseFAQResponse(response, faqPairs) {
const results = {};
const regex = /\[([^\]]+)\]\s*([^[]*?)(?=\n\[|$)/gs;
let match;
const parsedItems = {};
while ((match = regex.exec(response)) !== null) {
const tag = match[1].trim();
const content = cleanGeneratedContent(match[2].trim());
parsedItems[tag] = content;
}
// Mapper aux paires FAQ
faqPairs.forEach(pair => {
const qCleanTag = pair.question.tag.replace(/\|/g, '');
const aCleanTag = pair.answer.tag.replace(/\|/g, '');
if (parsedItems[qCleanTag]) results[pair.question.tag] = parsedItems[qCleanTag];
if (parsedItems[aCleanTag]) results[pair.answer.tag] = parsedItems[aCleanTag];
});
return results;
}
// ============= HELPER FUNCTIONS =============
function collectElementsInXMLOrder(hierarchy) {
const allElements = [];
Object.keys(hierarchy).forEach(path => {
const section = hierarchy[path];
if (section.title) {
allElements.push({
tag: section.title.originalElement.originalTag,
element: section.title.originalElement,
type: section.title.originalElement.type
});
}
if (section.text) {
allElements.push({
tag: section.text.originalElement.originalTag,
element: section.text.originalElement,
type: section.text.originalElement.type
});
}
section.questions.forEach(q => {
allElements.push({
tag: q.originalElement.originalTag,
element: q.originalElement,
type: q.originalElement.type
});
});
});
return allElements;
}
function separateElementTypes(allElements) {
const faqPairs = [];
const otherElements = [];
const faqQuestions = {};
const faqAnswers = {};
// Collecter FAQ questions et answers
allElements.forEach(element => {
if (element.type === 'faq_question') {
const numberMatch = element.tag.match(/(\d+)/);
const faqNumber = numberMatch ? numberMatch[1] : '1';
faqQuestions[faqNumber] = element;
} else if (element.type === 'faq_reponse') {
const numberMatch = element.tag.match(/(\d+)/);
const faqNumber = numberMatch ? numberMatch[1] : '1';
faqAnswers[faqNumber] = element;
} else {
otherElements.push(element);
}
});
// Créer paires FAQ
Object.keys(faqQuestions).forEach(number => {
const question = faqQuestions[number];
const answer = faqAnswers[number];
if (question && answer) {
faqPairs.push({ number, question, answer });
} else if (question) {
otherElements.push(question);
} else if (answer) {
otherElements.push(answer);
}
});
return { faqPairs, otherElements };
}
function getElementDescription(elementInfo) {
switch (elementInfo.type) {
case 'titre_h1': return 'Titre principal accrocheur';
case 'titre_h2': return 'Titre de section';
case 'titre_h3': return 'Sous-titre';
case 'intro': return 'Introduction engageante';
case 'texte': return 'Paragraphe informatif';
default: return 'Contenu pertinent';
}
}
function cleanGeneratedContent(content) {
if (!content) return content;
// Supprimer préfixes indésirables
content = content.replace(/^(Bon,?\s*)?(alors,?\s*)?Titre_[HU]\d+_\d+[.,\s]*/gi, '');
content = content.replace(/\*\*[^*]+\*\*/g, '');
content = content.replace(/\s{2,}/g, ' ');
content = content.trim();
return content;
}
function chunkArray(array, size) {
const chunks = [];
for (let i = 0; i < array.length; i += size) {
chunks.push(array.slice(i, i + size));
}
return chunks;
}
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
module.exports = {
generateInitialContent, // ← MAIN ENTRY POINT
generateNormalElements,
generateFAQPairs,
createBatchPrompt,
parseBatchResponse,
collectElementsInXMLOrder,
separateElementTypes
};