401 lines
13 KiB
JavaScript
401 lines
13 KiB
JavaScript
// ========================================
|
|
// FICHIER: HumanSimulationUtils.js
|
|
// RESPONSABILITÉ: Utilitaires partagés Human Simulation
|
|
// Fonctions d'analyse, validation et helpers
|
|
// ========================================
|
|
|
|
const { logSh } = require('../ErrorReporting');
|
|
|
|
/**
|
|
* SEUILS DE QUALITÉ
|
|
*/
|
|
const QUALITY_THRESHOLDS = {
|
|
readability: {
|
|
minimum: 0.3, // FIXÉ: Plus permissif (était 0.6)
|
|
good: 0.6,
|
|
excellent: 0.8
|
|
},
|
|
keywordPreservation: {
|
|
minimum: 0.7, // FIXÉ: Plus permissif (était 0.8)
|
|
good: 0.9,
|
|
excellent: 0.95
|
|
},
|
|
similarity: {
|
|
minimum: 0.5, // FIXÉ: Plus permissif (était 0.7)
|
|
maximum: 1.0 // FIXÉ: Accepter même contenu identique (était 0.95)
|
|
}
|
|
};
|
|
|
|
/**
|
|
* MOTS-CLÉS À PRÉSERVER ABSOLUMENT
|
|
*/
|
|
const CRITICAL_KEYWORDS = [
|
|
// Mots-clés SEO génériques
|
|
'plaque', 'personnalisée', 'gravure', 'métal', 'bois', 'acrylique',
|
|
'design', 'qualité', 'fabrication', 'artisanal', 'sur-mesure',
|
|
// Termes techniques importants
|
|
'laser', 'CNC', 'impression', 'découpe', 'finition', 'traitement',
|
|
// Termes commerciaux
|
|
'prix', 'tarif', 'devis', 'livraison', 'garantie', 'service'
|
|
];
|
|
|
|
/**
|
|
* ANALYSE COMPLEXITÉ CONTENU
|
|
* @param {object} content - Contenu à analyser
|
|
* @returns {object} - Métriques de complexité
|
|
*/
|
|
function analyzeContentComplexity(content) {
|
|
logSh('🔍 Analyse complexité contenu', 'DEBUG');
|
|
|
|
const contentArray = Object.values(content).filter(c => typeof c === 'string');
|
|
const totalText = contentArray.join(' ');
|
|
|
|
// Métriques de base
|
|
const totalWords = totalText.split(/\s+/).length;
|
|
const totalSentences = totalText.split(/[.!?]+/).length;
|
|
const totalParagraphs = contentArray.length;
|
|
|
|
// Complexité lexicale
|
|
const uniqueWords = new Set(totalText.toLowerCase().split(/\s+/)).size;
|
|
const lexicalDiversity = uniqueWords / totalWords;
|
|
|
|
// Longueur moyenne des phrases
|
|
const avgSentenceLength = totalWords / totalSentences;
|
|
|
|
// Complexité syntaxique (approximative)
|
|
const complexConnectors = (totalText.match(/néanmoins|cependant|par conséquent|en outre|toutefois/gi) || []).length;
|
|
const syntacticComplexity = complexConnectors / totalSentences;
|
|
|
|
// Score global de complexité
|
|
const complexityScore = (
|
|
(lexicalDiversity * 0.4) +
|
|
(Math.min(avgSentenceLength / 100, 1) * 0.3) +
|
|
(syntacticComplexity * 0.3)
|
|
);
|
|
|
|
const complexity = {
|
|
totalWords,
|
|
totalSentences,
|
|
totalParagraphs,
|
|
avgSentenceLength,
|
|
lexicalDiversity,
|
|
syntacticComplexity,
|
|
complexityScore,
|
|
level: complexityScore > 0.7 ? 'high' : complexityScore > 0.4 ? 'medium' : 'low'
|
|
};
|
|
|
|
logSh(` 📊 Complexité: ${complexity.level} (score: ${complexityScore.toFixed(2)})`, 'DEBUG');
|
|
logSh(` 📝 ${totalWords} mots, ${totalSentences} phrases, diversité: ${lexicalDiversity.toFixed(2)}`, 'DEBUG');
|
|
|
|
return complexity;
|
|
}
|
|
|
|
/**
|
|
* CALCUL SCORE LISIBILITÉ
|
|
* Approximation de l'index Flesch-Kincaid adapté au français
|
|
* @param {string} text - Texte à analyser
|
|
* @returns {number} - Score lisibilité (0-1)
|
|
*/
|
|
function calculateReadabilityScore(text) {
|
|
if (!text || text.trim().length === 0) {
|
|
return 0;
|
|
}
|
|
|
|
// Nettoyage du texte
|
|
const cleanText = text.replace(/[^\w\s.!?]/gi, '');
|
|
|
|
// Comptages de base
|
|
const sentences = cleanText.split(/[.!?]+/).filter(s => s.trim().length > 0);
|
|
const words = cleanText.split(/\s+/).filter(w => w.length > 0);
|
|
const syllables = countSyllables(cleanText);
|
|
|
|
if (sentences.length === 0 || words.length === 0) {
|
|
return 0;
|
|
}
|
|
|
|
// Métriques Flesch-Kincaid adaptées français
|
|
const avgWordsPerSentence = words.length / sentences.length;
|
|
const avgSyllablesPerWord = syllables / words.length;
|
|
|
|
// Formule adaptée (plus clémente que l'originale)
|
|
const fleschScore = 206.835 - (1.015 * avgWordsPerSentence) - (84.6 * avgSyllablesPerWord);
|
|
|
|
// Normalisation 0-1 (100 = parfait en Flesch)
|
|
const normalizedScore = Math.max(0, Math.min(1, fleschScore / 100));
|
|
|
|
logSh(` 📖 Lisibilité: ${normalizedScore.toFixed(2)} (mots/phrase: ${avgWordsPerSentence.toFixed(1)}, syll/mot: ${avgSyllablesPerWord.toFixed(1)})`, 'DEBUG');
|
|
|
|
return normalizedScore;
|
|
}
|
|
|
|
/**
|
|
* COMPTAGE SYLLABES (APPROXIMATIF FRANÇAIS)
|
|
*/
|
|
function countSyllables(text) {
|
|
// Approximation pour le français
|
|
const vowels = /[aeiouyàáâäèéêëìíîïòóôöùúûü]/gi;
|
|
const vowelGroups = text.match(vowels) || [];
|
|
|
|
// Approximation: 1 groupe de voyelles ≈ 1 syllabe
|
|
// Ajustements pour le français
|
|
let syllables = vowelGroups.length;
|
|
|
|
// Corrections courantes
|
|
const corrections = [
|
|
{ pattern: /ion/gi, adjustment: 0 }, // "tion" = 1 syllabe, pas 2
|
|
{ pattern: /ieu/gi, adjustment: -1 }, // "ieux" = 1 syllabe
|
|
{ pattern: /eau/gi, adjustment: -1 }, // "eau" = 1 syllabe
|
|
{ pattern: /ai/gi, adjustment: -1 }, // "ai" = 1 syllabe
|
|
{ pattern: /ou/gi, adjustment: -1 }, // "ou" = 1 syllabe
|
|
{ pattern: /e$/gi, adjustment: -0.5 } // "e" final muet
|
|
];
|
|
|
|
corrections.forEach(correction => {
|
|
const matches = text.match(correction.pattern) || [];
|
|
syllables += matches.length * correction.adjustment;
|
|
});
|
|
|
|
return Math.max(1, Math.round(syllables));
|
|
}
|
|
|
|
/**
|
|
* PRÉSERVATION MOTS-CLÉS
|
|
* @param {string} originalText - Texte original
|
|
* @param {string} modifiedText - Texte modifié
|
|
* @returns {number} - Score préservation (0-1)
|
|
*/
|
|
function preserveKeywords(originalText, modifiedText) {
|
|
if (!originalText || !modifiedText) {
|
|
return 0;
|
|
}
|
|
|
|
const originalLower = originalText.toLowerCase();
|
|
const modifiedLower = modifiedText.toLowerCase();
|
|
|
|
// Extraire mots-clés du texte original
|
|
const originalKeywords = extractKeywords(originalLower);
|
|
|
|
// Vérifier préservation
|
|
let preservedCount = 0;
|
|
let criticalPreservedCount = 0;
|
|
let criticalTotalCount = 0;
|
|
|
|
originalKeywords.forEach(keyword => {
|
|
const isCritical = CRITICAL_KEYWORDS.some(ck =>
|
|
keyword.toLowerCase().includes(ck.toLowerCase()) ||
|
|
ck.toLowerCase().includes(keyword.toLowerCase())
|
|
);
|
|
|
|
if (isCritical) {
|
|
criticalTotalCount++;
|
|
}
|
|
|
|
// Vérifier présence dans texte modifié
|
|
const keywordRegex = new RegExp(`\\b${keyword}\\b`, 'gi');
|
|
if (modifiedLower.match(keywordRegex)) {
|
|
preservedCount++;
|
|
if (isCritical) {
|
|
criticalPreservedCount++;
|
|
}
|
|
}
|
|
});
|
|
|
|
// Score avec bonus pour mots-clés critiques
|
|
const basicPreservation = preservedCount / Math.max(1, originalKeywords.length);
|
|
const criticalPreservation = criticalTotalCount > 0 ?
|
|
criticalPreservedCount / criticalTotalCount : 1.0;
|
|
|
|
const finalScore = (basicPreservation * 0.6) + (criticalPreservation * 0.4);
|
|
|
|
logSh(` 🔑 Mots-clés: ${preservedCount}/${originalKeywords.length} préservés (${criticalPreservedCount}/${criticalTotalCount} critiques)`, 'DEBUG');
|
|
logSh(` 🎯 Score préservation: ${finalScore.toFixed(2)}`, 'DEBUG');
|
|
|
|
return finalScore;
|
|
}
|
|
|
|
/**
|
|
* EXTRACTION MOTS-CLÉS SIMPLES
|
|
*/
|
|
function extractKeywords(text) {
|
|
// Mots de plus de 3 caractères, non vides
|
|
const words = text.match(/\b\w{4,}\b/g) || [];
|
|
|
|
// Filtrer mots courants français
|
|
const stopWords = [
|
|
'avec', 'dans', 'pour', 'cette', 'sont', 'tout', 'mais', 'plus', 'très',
|
|
'bien', 'encore', 'aussi', 'comme', 'après', 'avant', 'entre', 'depuis'
|
|
];
|
|
|
|
const keywords = words
|
|
.filter(word => !stopWords.includes(word.toLowerCase()))
|
|
.filter((word, index, array) => array.indexOf(word) === index) // Unique
|
|
.slice(0, 20); // Limiter à 20 mots-clés
|
|
|
|
return keywords;
|
|
}
|
|
|
|
/**
|
|
* VALIDATION QUALITÉ SIMULATION
|
|
* @param {string} originalContent - Contenu original
|
|
* @param {string} simulatedContent - Contenu simulé
|
|
* @param {number} qualityThreshold - Seuil qualité minimum
|
|
* @returns {object} - Résultat validation
|
|
*/
|
|
function validateSimulationQuality(originalContent, simulatedContent, qualityThreshold = 0.7) {
|
|
if (!originalContent || !simulatedContent) {
|
|
return { acceptable: false, reason: 'Contenu manquant' };
|
|
}
|
|
|
|
logSh('🎯 Validation qualité simulation', 'DEBUG');
|
|
|
|
// Métriques de qualité
|
|
const readabilityScore = calculateReadabilityScore(simulatedContent);
|
|
const keywordScore = preserveKeywords(originalContent, simulatedContent);
|
|
const similarityScore = calculateSimilarity(originalContent, simulatedContent);
|
|
|
|
// Score global pondéré
|
|
const globalScore = (
|
|
readabilityScore * 0.4 +
|
|
keywordScore * 0.4 +
|
|
(similarityScore > QUALITY_THRESHOLDS.similarity.minimum &&
|
|
similarityScore < QUALITY_THRESHOLDS.similarity.maximum ? 0.2 : 0)
|
|
);
|
|
|
|
const acceptable = globalScore >= qualityThreshold;
|
|
|
|
const validation = {
|
|
acceptable,
|
|
globalScore,
|
|
readabilityScore,
|
|
keywordScore,
|
|
similarityScore,
|
|
reason: acceptable ? 'Qualité acceptable' : determineQualityIssue(readabilityScore, keywordScore, similarityScore),
|
|
details: {
|
|
readabilityOk: readabilityScore >= QUALITY_THRESHOLDS.readability.minimum,
|
|
keywordsOk: keywordScore >= QUALITY_THRESHOLDS.keywordPreservation.minimum,
|
|
similarityOk: similarityScore >= QUALITY_THRESHOLDS.similarity.minimum &&
|
|
similarityScore <= QUALITY_THRESHOLDS.similarity.maximum
|
|
}
|
|
};
|
|
|
|
logSh(` 🎯 Validation: ${acceptable ? 'ACCEPTÉ' : 'REJETÉ'} (score: ${globalScore.toFixed(2)})`, acceptable ? 'INFO' : 'WARNING');
|
|
logSh(` 📊 Lisibilité: ${readabilityScore.toFixed(2)} | Mots-clés: ${keywordScore.toFixed(2)} | Similarité: ${similarityScore.toFixed(2)}`, 'DEBUG');
|
|
|
|
return validation;
|
|
}
|
|
|
|
/**
|
|
* CALCUL SIMILARITÉ APPROXIMATIVE
|
|
*/
|
|
function calculateSimilarity(text1, text2) {
|
|
// Similarité basée sur les mots partagés (simple mais efficace)
|
|
const words1 = new Set(text1.toLowerCase().split(/\s+/));
|
|
const words2 = new Set(text2.toLowerCase().split(/\s+/));
|
|
|
|
const intersection = new Set([...words1].filter(word => words2.has(word)));
|
|
const union = new Set([...words1, ...words2]);
|
|
|
|
return intersection.size / union.size;
|
|
}
|
|
|
|
/**
|
|
* DÉTERMINER PROBLÈME QUALITÉ
|
|
*/
|
|
function determineQualityIssue(readabilityScore, keywordScore, similarityScore) {
|
|
if (readabilityScore < QUALITY_THRESHOLDS.readability.minimum) {
|
|
return 'Lisibilité insuffisante';
|
|
}
|
|
if (keywordScore < QUALITY_THRESHOLDS.keywordPreservation.minimum) {
|
|
return 'Mots-clés mal préservés';
|
|
}
|
|
if (similarityScore < QUALITY_THRESHOLDS.similarity.minimum) {
|
|
return 'Trop différent de l\'original';
|
|
}
|
|
if (similarityScore > QUALITY_THRESHOLDS.similarity.maximum) {
|
|
return 'Pas assez modifié';
|
|
}
|
|
return 'Score global insuffisant';
|
|
}
|
|
|
|
/**
|
|
* GÉNÉRATION RAPPORT QUALITÉ DÉTAILLÉ
|
|
* @param {object} content - Contenu à analyser
|
|
* @param {object} simulationStats - Stats simulation
|
|
* @returns {object} - Rapport détaillé
|
|
*/
|
|
function generateQualityReport(content, simulationStats) {
|
|
const report = {
|
|
timestamp: new Date().toISOString(),
|
|
contentAnalysis: analyzeContentComplexity(content),
|
|
simulationStats,
|
|
qualityMetrics: {},
|
|
recommendations: []
|
|
};
|
|
|
|
// Analyse par élément
|
|
Object.entries(content).forEach(([key, elementContent]) => {
|
|
if (typeof elementContent === 'string') {
|
|
const readability = calculateReadabilityScore(elementContent);
|
|
const complexity = analyzeContentComplexity({ [key]: elementContent });
|
|
|
|
report.qualityMetrics[key] = {
|
|
readability,
|
|
complexity: complexity.complexityScore,
|
|
wordCount: elementContent.split(/\s+/).length
|
|
};
|
|
}
|
|
});
|
|
|
|
// Recommandations automatiques
|
|
if (report.contentAnalysis.complexityScore > 0.8) {
|
|
report.recommendations.push('Simplifier le vocabulaire pour améliorer la lisibilité');
|
|
}
|
|
|
|
if (simulationStats.fatigueModifications < 1) {
|
|
report.recommendations.push('Augmenter l\'intensité de simulation fatigue');
|
|
}
|
|
|
|
return report;
|
|
}
|
|
|
|
/**
|
|
* HELPERS STATISTIQUES
|
|
*/
|
|
function calculateStatistics(values) {
|
|
const sorted = values.slice().sort((a, b) => a - b);
|
|
const length = values.length;
|
|
|
|
return {
|
|
mean: values.reduce((sum, val) => sum + val, 0) / length,
|
|
median: length % 2 === 0 ?
|
|
(sorted[length / 2 - 1] + sorted[length / 2]) / 2 :
|
|
sorted[Math.floor(length / 2)],
|
|
min: sorted[0],
|
|
max: sorted[length - 1],
|
|
stdDev: calculateStandardDeviation(values)
|
|
};
|
|
}
|
|
|
|
function calculateStandardDeviation(values) {
|
|
const mean = values.reduce((sum, val) => sum + val, 0) / values.length;
|
|
const squaredDifferences = values.map(val => Math.pow(val - mean, 2));
|
|
const variance = squaredDifferences.reduce((sum, val) => sum + val, 0) / values.length;
|
|
return Math.sqrt(variance);
|
|
}
|
|
|
|
// ============= EXPORTS =============
|
|
module.exports = {
|
|
analyzeContentComplexity,
|
|
calculateReadabilityScore,
|
|
preserveKeywords,
|
|
validateSimulationQuality,
|
|
generateQualityReport,
|
|
calculateStatistics,
|
|
calculateStandardDeviation,
|
|
countSyllables,
|
|
extractKeywords,
|
|
calculateSimilarity,
|
|
determineQualityIssue,
|
|
QUALITY_THRESHOLDS,
|
|
CRITICAL_KEYWORDS
|
|
}; |