seogeneratorserver/lib/human-simulation/HumanSimulationUtils.js
StillHammer dbf1a3de8c Add technical plan for multi-format export system
Added plan.md with complete architecture for format-agnostic content generation:
- Support for Markdown, HTML, Plain Text, JSON formats
- New FormatExporter module with neutral data structure
- Integration strategy with existing ContentAssembly and ArticleStorage
- Bonus features: SEO metadata generation, readability scoring, WordPress Gutenberg format
- Implementation roadmap with 4 phases (6h total estimated)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-18 16:14:29 +08:00

401 lines
13 KiB
JavaScript

// ========================================
// FICHIER: HumanSimulationUtils.js
// RESPONSABILITÉ: Utilitaires partagés Human Simulation
// Fonctions d'analyse, validation et helpers
// ========================================
const { logSh } = require('../ErrorReporting');
/**
* SEUILS DE QUALITÉ
*/
const QUALITY_THRESHOLDS = {
readability: {
minimum: 0.3, // FIXÉ: Plus permissif (était 0.6)
good: 0.6,
excellent: 0.8
},
keywordPreservation: {
minimum: 0.7, // FIXÉ: Plus permissif (était 0.8)
good: 0.9,
excellent: 0.95
},
similarity: {
minimum: 0.5, // FIXÉ: Plus permissif (était 0.7)
maximum: 1.0 // FIXÉ: Accepter même contenu identique (était 0.95)
}
};
/**
* MOTS-CLÉS À PRÉSERVER ABSOLUMENT
*/
const CRITICAL_KEYWORDS = [
// Mots-clés SEO génériques
'plaque', 'personnalisée', 'gravure', 'métal', 'bois', 'acrylique',
'design', 'qualité', 'fabrication', 'artisanal', 'sur-mesure',
// Termes techniques importants
'laser', 'CNC', 'impression', 'découpe', 'finition', 'traitement',
// Termes commerciaux
'prix', 'tarif', 'devis', 'livraison', 'garantie', 'service'
];
/**
* ANALYSE COMPLEXITÉ CONTENU
* @param {object} content - Contenu à analyser
* @returns {object} - Métriques de complexité
*/
function analyzeContentComplexity(content) {
logSh('🔍 Analyse complexité contenu', 'DEBUG');
const contentArray = Object.values(content).filter(c => typeof c === 'string');
const totalText = contentArray.join(' ');
// Métriques de base
const totalWords = totalText.split(/\s+/).length;
const totalSentences = totalText.split(/[.!?]+/).length;
const totalParagraphs = contentArray.length;
// Complexité lexicale
const uniqueWords = new Set(totalText.toLowerCase().split(/\s+/)).size;
const lexicalDiversity = uniqueWords / totalWords;
// Longueur moyenne des phrases
const avgSentenceLength = totalWords / totalSentences;
// Complexité syntaxique (approximative)
const complexConnectors = (totalText.match(/néanmoins|cependant|par conséquent|en outre|toutefois/gi) || []).length;
const syntacticComplexity = complexConnectors / totalSentences;
// Score global de complexité
const complexityScore = (
(lexicalDiversity * 0.4) +
(Math.min(avgSentenceLength / 100, 1) * 0.3) +
(syntacticComplexity * 0.3)
);
const complexity = {
totalWords,
totalSentences,
totalParagraphs,
avgSentenceLength,
lexicalDiversity,
syntacticComplexity,
complexityScore,
level: complexityScore > 0.7 ? 'high' : complexityScore > 0.4 ? 'medium' : 'low'
};
logSh(` 📊 Complexité: ${complexity.level} (score: ${complexityScore.toFixed(2)})`, 'DEBUG');
logSh(` 📝 ${totalWords} mots, ${totalSentences} phrases, diversité: ${lexicalDiversity.toFixed(2)}`, 'DEBUG');
return complexity;
}
/**
* CALCUL SCORE LISIBILITÉ
* Approximation de l'index Flesch-Kincaid adapté au français
* @param {string} text - Texte à analyser
* @returns {number} - Score lisibilité (0-1)
*/
function calculateReadabilityScore(text) {
if (!text || text.trim().length === 0) {
return 0;
}
// Nettoyage du texte
const cleanText = text.replace(/[^\w\s.!?]/gi, '');
// Comptages de base
const sentences = cleanText.split(/[.!?]+/).filter(s => s.trim().length > 0);
const words = cleanText.split(/\s+/).filter(w => w.length > 0);
const syllables = countSyllables(cleanText);
if (sentences.length === 0 || words.length === 0) {
return 0;
}
// Métriques Flesch-Kincaid adaptées français
const avgWordsPerSentence = words.length / sentences.length;
const avgSyllablesPerWord = syllables / words.length;
// Formule adaptée (plus clémente que l'originale)
const fleschScore = 206.835 - (1.015 * avgWordsPerSentence) - (84.6 * avgSyllablesPerWord);
// Normalisation 0-1 (100 = parfait en Flesch)
const normalizedScore = Math.max(0, Math.min(1, fleschScore / 100));
logSh(` 📖 Lisibilité: ${normalizedScore.toFixed(2)} (mots/phrase: ${avgWordsPerSentence.toFixed(1)}, syll/mot: ${avgSyllablesPerWord.toFixed(1)})`, 'DEBUG');
return normalizedScore;
}
/**
* COMPTAGE SYLLABES (APPROXIMATIF FRANÇAIS)
*/
function countSyllables(text) {
// Approximation pour le français
const vowels = /[aeiouyàáâäèéêëìíîïòóôöùúûü]/gi;
const vowelGroups = text.match(vowels) || [];
// Approximation: 1 groupe de voyelles ≈ 1 syllabe
// Ajustements pour le français
let syllables = vowelGroups.length;
// Corrections courantes
const corrections = [
{ pattern: /ion/gi, adjustment: 0 }, // "tion" = 1 syllabe, pas 2
{ pattern: /ieu/gi, adjustment: -1 }, // "ieux" = 1 syllabe
{ pattern: /eau/gi, adjustment: -1 }, // "eau" = 1 syllabe
{ pattern: /ai/gi, adjustment: -1 }, // "ai" = 1 syllabe
{ pattern: /ou/gi, adjustment: -1 }, // "ou" = 1 syllabe
{ pattern: /e$/gi, adjustment: -0.5 } // "e" final muet
];
corrections.forEach(correction => {
const matches = text.match(correction.pattern) || [];
syllables += matches.length * correction.adjustment;
});
return Math.max(1, Math.round(syllables));
}
/**
* PRÉSERVATION MOTS-CLÉS
* @param {string} originalText - Texte original
* @param {string} modifiedText - Texte modifié
* @returns {number} - Score préservation (0-1)
*/
function preserveKeywords(originalText, modifiedText) {
if (!originalText || !modifiedText) {
return 0;
}
const originalLower = originalText.toLowerCase();
const modifiedLower = modifiedText.toLowerCase();
// Extraire mots-clés du texte original
const originalKeywords = extractKeywords(originalLower);
// Vérifier préservation
let preservedCount = 0;
let criticalPreservedCount = 0;
let criticalTotalCount = 0;
originalKeywords.forEach(keyword => {
const isCritical = CRITICAL_KEYWORDS.some(ck =>
keyword.toLowerCase().includes(ck.toLowerCase()) ||
ck.toLowerCase().includes(keyword.toLowerCase())
);
if (isCritical) {
criticalTotalCount++;
}
// Vérifier présence dans texte modifié
const keywordRegex = new RegExp(`\\b${keyword}\\b`, 'gi');
if (modifiedLower.match(keywordRegex)) {
preservedCount++;
if (isCritical) {
criticalPreservedCount++;
}
}
});
// Score avec bonus pour mots-clés critiques
const basicPreservation = preservedCount / Math.max(1, originalKeywords.length);
const criticalPreservation = criticalTotalCount > 0 ?
criticalPreservedCount / criticalTotalCount : 1.0;
const finalScore = (basicPreservation * 0.6) + (criticalPreservation * 0.4);
logSh(` 🔑 Mots-clés: ${preservedCount}/${originalKeywords.length} préservés (${criticalPreservedCount}/${criticalTotalCount} critiques)`, 'DEBUG');
logSh(` 🎯 Score préservation: ${finalScore.toFixed(2)}`, 'DEBUG');
return finalScore;
}
/**
* EXTRACTION MOTS-CLÉS SIMPLES
*/
function extractKeywords(text) {
// Mots de plus de 3 caractères, non vides
const words = text.match(/\b\w{4,}\b/g) || [];
// Filtrer mots courants français
const stopWords = [
'avec', 'dans', 'pour', 'cette', 'sont', 'tout', 'mais', 'plus', 'très',
'bien', 'encore', 'aussi', 'comme', 'après', 'avant', 'entre', 'depuis'
];
const keywords = words
.filter(word => !stopWords.includes(word.toLowerCase()))
.filter((word, index, array) => array.indexOf(word) === index) // Unique
.slice(0, 20); // Limiter à 20 mots-clés
return keywords;
}
/**
* VALIDATION QUALITÉ SIMULATION
* @param {string} originalContent - Contenu original
* @param {string} simulatedContent - Contenu simulé
* @param {number} qualityThreshold - Seuil qualité minimum
* @returns {object} - Résultat validation
*/
function validateSimulationQuality(originalContent, simulatedContent, qualityThreshold = 0.7) {
if (!originalContent || !simulatedContent) {
return { acceptable: false, reason: 'Contenu manquant' };
}
logSh('🎯 Validation qualité simulation', 'DEBUG');
// Métriques de qualité
const readabilityScore = calculateReadabilityScore(simulatedContent);
const keywordScore = preserveKeywords(originalContent, simulatedContent);
const similarityScore = calculateSimilarity(originalContent, simulatedContent);
// Score global pondéré
const globalScore = (
readabilityScore * 0.4 +
keywordScore * 0.4 +
(similarityScore > QUALITY_THRESHOLDS.similarity.minimum &&
similarityScore < QUALITY_THRESHOLDS.similarity.maximum ? 0.2 : 0)
);
const acceptable = globalScore >= qualityThreshold;
const validation = {
acceptable,
globalScore,
readabilityScore,
keywordScore,
similarityScore,
reason: acceptable ? 'Qualité acceptable' : determineQualityIssue(readabilityScore, keywordScore, similarityScore),
details: {
readabilityOk: readabilityScore >= QUALITY_THRESHOLDS.readability.minimum,
keywordsOk: keywordScore >= QUALITY_THRESHOLDS.keywordPreservation.minimum,
similarityOk: similarityScore >= QUALITY_THRESHOLDS.similarity.minimum &&
similarityScore <= QUALITY_THRESHOLDS.similarity.maximum
}
};
logSh(` 🎯 Validation: ${acceptable ? 'ACCEPTÉ' : 'REJETÉ'} (score: ${globalScore.toFixed(2)})`, acceptable ? 'INFO' : 'WARNING');
logSh(` 📊 Lisibilité: ${readabilityScore.toFixed(2)} | Mots-clés: ${keywordScore.toFixed(2)} | Similarité: ${similarityScore.toFixed(2)}`, 'DEBUG');
return validation;
}
/**
* CALCUL SIMILARITÉ APPROXIMATIVE
*/
function calculateSimilarity(text1, text2) {
// Similarité basée sur les mots partagés (simple mais efficace)
const words1 = new Set(text1.toLowerCase().split(/\s+/));
const words2 = new Set(text2.toLowerCase().split(/\s+/));
const intersection = new Set([...words1].filter(word => words2.has(word)));
const union = new Set([...words1, ...words2]);
return intersection.size / union.size;
}
/**
* DÉTERMINER PROBLÈME QUALITÉ
*/
function determineQualityIssue(readabilityScore, keywordScore, similarityScore) {
if (readabilityScore < QUALITY_THRESHOLDS.readability.minimum) {
return 'Lisibilité insuffisante';
}
if (keywordScore < QUALITY_THRESHOLDS.keywordPreservation.minimum) {
return 'Mots-clés mal préservés';
}
if (similarityScore < QUALITY_THRESHOLDS.similarity.minimum) {
return 'Trop différent de l\'original';
}
if (similarityScore > QUALITY_THRESHOLDS.similarity.maximum) {
return 'Pas assez modifié';
}
return 'Score global insuffisant';
}
/**
* GÉNÉRATION RAPPORT QUALITÉ DÉTAILLÉ
* @param {object} content - Contenu à analyser
* @param {object} simulationStats - Stats simulation
* @returns {object} - Rapport détaillé
*/
function generateQualityReport(content, simulationStats) {
const report = {
timestamp: new Date().toISOString(),
contentAnalysis: analyzeContentComplexity(content),
simulationStats,
qualityMetrics: {},
recommendations: []
};
// Analyse par élément
Object.entries(content).forEach(([key, elementContent]) => {
if (typeof elementContent === 'string') {
const readability = calculateReadabilityScore(elementContent);
const complexity = analyzeContentComplexity({ [key]: elementContent });
report.qualityMetrics[key] = {
readability,
complexity: complexity.complexityScore,
wordCount: elementContent.split(/\s+/).length
};
}
});
// Recommandations automatiques
if (report.contentAnalysis.complexityScore > 0.8) {
report.recommendations.push('Simplifier le vocabulaire pour améliorer la lisibilité');
}
if (simulationStats.fatigueModifications < 1) {
report.recommendations.push('Augmenter l\'intensité de simulation fatigue');
}
return report;
}
/**
* HELPERS STATISTIQUES
*/
function calculateStatistics(values) {
const sorted = values.slice().sort((a, b) => a - b);
const length = values.length;
return {
mean: values.reduce((sum, val) => sum + val, 0) / length,
median: length % 2 === 0 ?
(sorted[length / 2 - 1] + sorted[length / 2]) / 2 :
sorted[Math.floor(length / 2)],
min: sorted[0],
max: sorted[length - 1],
stdDev: calculateStandardDeviation(values)
};
}
function calculateStandardDeviation(values) {
const mean = values.reduce((sum, val) => sum + val, 0) / values.length;
const squaredDifferences = values.map(val => Math.pow(val - mean, 2));
const variance = squaredDifferences.reduce((sum, val) => sum + val, 0) / values.length;
return Math.sqrt(variance);
}
// ============= EXPORTS =============
module.exports = {
analyzeContentComplexity,
calculateReadabilityScore,
preserveKeywords,
validateSimulationQuality,
generateQualityReport,
calculateStatistics,
calculateStandardDeviation,
countSyllables,
extractKeywords,
calculateSimilarity,
determineQualityIssue,
QUALITY_THRESHOLDS,
CRITICAL_KEYWORDS
};