// ======================================== // FICHIER: HumanSimulationUtils.js // RESPONSABILITÉ: Utilitaires partagés Human Simulation // Fonctions d'analyse, validation et helpers // ======================================== const { logSh } = require('../ErrorReporting'); /** * SEUILS DE QUALITÉ */ const QUALITY_THRESHOLDS = { readability: { minimum: 0.3, // FIXÉ: Plus permissif (était 0.6) good: 0.6, excellent: 0.8 }, keywordPreservation: { minimum: 0.7, // FIXÉ: Plus permissif (était 0.8) good: 0.9, excellent: 0.95 }, similarity: { minimum: 0.5, // FIXÉ: Plus permissif (était 0.7) maximum: 1.0 // FIXÉ: Accepter même contenu identique (était 0.95) } }; /** * MOTS-CLÉS À PRÉSERVER ABSOLUMENT */ const CRITICAL_KEYWORDS = [ // Mots-clés SEO génériques 'plaque', 'personnalisée', 'gravure', 'métal', 'bois', 'acrylique', 'design', 'qualité', 'fabrication', 'artisanal', 'sur-mesure', // Termes techniques importants 'laser', 'CNC', 'impression', 'découpe', 'finition', 'traitement', // Termes commerciaux 'prix', 'tarif', 'devis', 'livraison', 'garantie', 'service' ]; /** * ANALYSE COMPLEXITÉ CONTENU * @param {object} content - Contenu à analyser * @returns {object} - Métriques de complexité */ function analyzeContentComplexity(content) { logSh('🔍 Analyse complexité contenu', 'DEBUG'); const contentArray = Object.values(content).filter(c => typeof c === 'string'); const totalText = contentArray.join(' '); // Métriques de base const totalWords = totalText.split(/\s+/).length; const totalSentences = totalText.split(/[.!?]+/).length; const totalParagraphs = contentArray.length; // Complexité lexicale const uniqueWords = new Set(totalText.toLowerCase().split(/\s+/)).size; const lexicalDiversity = uniqueWords / totalWords; // Longueur moyenne des phrases const avgSentenceLength = totalWords / totalSentences; // Complexité syntaxique (approximative) const complexConnectors = (totalText.match(/néanmoins|cependant|par conséquent|en outre|toutefois/gi) || []).length; const syntacticComplexity = complexConnectors / totalSentences; // Score global de complexité const complexityScore = ( (lexicalDiversity * 0.4) + (Math.min(avgSentenceLength / 100, 1) * 0.3) + (syntacticComplexity * 0.3) ); const complexity = { totalWords, totalSentences, totalParagraphs, avgSentenceLength, lexicalDiversity, syntacticComplexity, complexityScore, level: complexityScore > 0.7 ? 'high' : complexityScore > 0.4 ? 'medium' : 'low' }; logSh(` 📊 Complexité: ${complexity.level} (score: ${complexityScore.toFixed(2)})`, 'DEBUG'); logSh(` 📝 ${totalWords} mots, ${totalSentences} phrases, diversité: ${lexicalDiversity.toFixed(2)}`, 'DEBUG'); return complexity; } /** * CALCUL SCORE LISIBILITÉ * Approximation de l'index Flesch-Kincaid adapté au français * @param {string} text - Texte à analyser * @returns {number} - Score lisibilité (0-1) */ function calculateReadabilityScore(text) { if (!text || text.trim().length === 0) { return 0; } // Nettoyage du texte const cleanText = text.replace(/[^\w\s.!?]/gi, ''); // Comptages de base const sentences = cleanText.split(/[.!?]+/).filter(s => s.trim().length > 0); const words = cleanText.split(/\s+/).filter(w => w.length > 0); const syllables = countSyllables(cleanText); if (sentences.length === 0 || words.length === 0) { return 0; } // Métriques Flesch-Kincaid adaptées français const avgWordsPerSentence = words.length / sentences.length; const avgSyllablesPerWord = syllables / words.length; // Formule adaptée (plus clémente que l'originale) const fleschScore = 206.835 - (1.015 * avgWordsPerSentence) - (84.6 * avgSyllablesPerWord); // Normalisation 0-1 (100 = parfait en Flesch) const normalizedScore = Math.max(0, Math.min(1, fleschScore / 100)); logSh(` 📖 Lisibilité: ${normalizedScore.toFixed(2)} (mots/phrase: ${avgWordsPerSentence.toFixed(1)}, syll/mot: ${avgSyllablesPerWord.toFixed(1)})`, 'DEBUG'); return normalizedScore; } /** * COMPTAGE SYLLABES (APPROXIMATIF FRANÇAIS) */ function countSyllables(text) { // Approximation pour le français const vowels = /[aeiouyàáâäèéêëìíîïòóôöùúûü]/gi; const vowelGroups = text.match(vowels) || []; // Approximation: 1 groupe de voyelles ≈ 1 syllabe // Ajustements pour le français let syllables = vowelGroups.length; // Corrections courantes const corrections = [ { pattern: /ion/gi, adjustment: 0 }, // "tion" = 1 syllabe, pas 2 { pattern: /ieu/gi, adjustment: -1 }, // "ieux" = 1 syllabe { pattern: /eau/gi, adjustment: -1 }, // "eau" = 1 syllabe { pattern: /ai/gi, adjustment: -1 }, // "ai" = 1 syllabe { pattern: /ou/gi, adjustment: -1 }, // "ou" = 1 syllabe { pattern: /e$/gi, adjustment: -0.5 } // "e" final muet ]; corrections.forEach(correction => { const matches = text.match(correction.pattern) || []; syllables += matches.length * correction.adjustment; }); return Math.max(1, Math.round(syllables)); } /** * PRÉSERVATION MOTS-CLÉS * @param {string} originalText - Texte original * @param {string} modifiedText - Texte modifié * @returns {number} - Score préservation (0-1) */ function preserveKeywords(originalText, modifiedText) { if (!originalText || !modifiedText) { return 0; } const originalLower = originalText.toLowerCase(); const modifiedLower = modifiedText.toLowerCase(); // Extraire mots-clés du texte original const originalKeywords = extractKeywords(originalLower); // Vérifier préservation let preservedCount = 0; let criticalPreservedCount = 0; let criticalTotalCount = 0; originalKeywords.forEach(keyword => { const isCritical = CRITICAL_KEYWORDS.some(ck => keyword.toLowerCase().includes(ck.toLowerCase()) || ck.toLowerCase().includes(keyword.toLowerCase()) ); if (isCritical) { criticalTotalCount++; } // Vérifier présence dans texte modifié const keywordRegex = new RegExp(`\\b${keyword}\\b`, 'gi'); if (modifiedLower.match(keywordRegex)) { preservedCount++; if (isCritical) { criticalPreservedCount++; } } }); // Score avec bonus pour mots-clés critiques const basicPreservation = preservedCount / Math.max(1, originalKeywords.length); const criticalPreservation = criticalTotalCount > 0 ? criticalPreservedCount / criticalTotalCount : 1.0; const finalScore = (basicPreservation * 0.6) + (criticalPreservation * 0.4); logSh(` 🔑 Mots-clés: ${preservedCount}/${originalKeywords.length} préservés (${criticalPreservedCount}/${criticalTotalCount} critiques)`, 'DEBUG'); logSh(` 🎯 Score préservation: ${finalScore.toFixed(2)}`, 'DEBUG'); return finalScore; } /** * EXTRACTION MOTS-CLÉS SIMPLES */ function extractKeywords(text) { // Mots de plus de 3 caractères, non vides const words = text.match(/\b\w{4,}\b/g) || []; // Filtrer mots courants français const stopWords = [ 'avec', 'dans', 'pour', 'cette', 'sont', 'tout', 'mais', 'plus', 'très', 'bien', 'encore', 'aussi', 'comme', 'après', 'avant', 'entre', 'depuis' ]; const keywords = words .filter(word => !stopWords.includes(word.toLowerCase())) .filter((word, index, array) => array.indexOf(word) === index) // Unique .slice(0, 20); // Limiter à 20 mots-clés return keywords; } /** * VALIDATION QUALITÉ SIMULATION * @param {string} originalContent - Contenu original * @param {string} simulatedContent - Contenu simulé * @param {number} qualityThreshold - Seuil qualité minimum * @returns {object} - Résultat validation */ function validateSimulationQuality(originalContent, simulatedContent, qualityThreshold = 0.7) { if (!originalContent || !simulatedContent) { return { acceptable: false, reason: 'Contenu manquant' }; } logSh('🎯 Validation qualité simulation', 'DEBUG'); // Métriques de qualité const readabilityScore = calculateReadabilityScore(simulatedContent); const keywordScore = preserveKeywords(originalContent, simulatedContent); const similarityScore = calculateSimilarity(originalContent, simulatedContent); // Score global pondéré const globalScore = ( readabilityScore * 0.4 + keywordScore * 0.4 + (similarityScore > QUALITY_THRESHOLDS.similarity.minimum && similarityScore < QUALITY_THRESHOLDS.similarity.maximum ? 0.2 : 0) ); const acceptable = globalScore >= qualityThreshold; const validation = { acceptable, globalScore, readabilityScore, keywordScore, similarityScore, reason: acceptable ? 'Qualité acceptable' : determineQualityIssue(readabilityScore, keywordScore, similarityScore), details: { readabilityOk: readabilityScore >= QUALITY_THRESHOLDS.readability.minimum, keywordsOk: keywordScore >= QUALITY_THRESHOLDS.keywordPreservation.minimum, similarityOk: similarityScore >= QUALITY_THRESHOLDS.similarity.minimum && similarityScore <= QUALITY_THRESHOLDS.similarity.maximum } }; logSh(` 🎯 Validation: ${acceptable ? 'ACCEPTÉ' : 'REJETÉ'} (score: ${globalScore.toFixed(2)})`, acceptable ? 'INFO' : 'WARNING'); logSh(` 📊 Lisibilité: ${readabilityScore.toFixed(2)} | Mots-clés: ${keywordScore.toFixed(2)} | Similarité: ${similarityScore.toFixed(2)}`, 'DEBUG'); return validation; } /** * CALCUL SIMILARITÉ APPROXIMATIVE */ function calculateSimilarity(text1, text2) { // Similarité basée sur les mots partagés (simple mais efficace) const words1 = new Set(text1.toLowerCase().split(/\s+/)); const words2 = new Set(text2.toLowerCase().split(/\s+/)); const intersection = new Set([...words1].filter(word => words2.has(word))); const union = new Set([...words1, ...words2]); return intersection.size / union.size; } /** * DÉTERMINER PROBLÈME QUALITÉ */ function determineQualityIssue(readabilityScore, keywordScore, similarityScore) { if (readabilityScore < QUALITY_THRESHOLDS.readability.minimum) { return 'Lisibilité insuffisante'; } if (keywordScore < QUALITY_THRESHOLDS.keywordPreservation.minimum) { return 'Mots-clés mal préservés'; } if (similarityScore < QUALITY_THRESHOLDS.similarity.minimum) { return 'Trop différent de l\'original'; } if (similarityScore > QUALITY_THRESHOLDS.similarity.maximum) { return 'Pas assez modifié'; } return 'Score global insuffisant'; } /** * GÉNÉRATION RAPPORT QUALITÉ DÉTAILLÉ * @param {object} content - Contenu à analyser * @param {object} simulationStats - Stats simulation * @returns {object} - Rapport détaillé */ function generateQualityReport(content, simulationStats) { const report = { timestamp: new Date().toISOString(), contentAnalysis: analyzeContentComplexity(content), simulationStats, qualityMetrics: {}, recommendations: [] }; // Analyse par élément Object.entries(content).forEach(([key, elementContent]) => { if (typeof elementContent === 'string') { const readability = calculateReadabilityScore(elementContent); const complexity = analyzeContentComplexity({ [key]: elementContent }); report.qualityMetrics[key] = { readability, complexity: complexity.complexityScore, wordCount: elementContent.split(/\s+/).length }; } }); // Recommandations automatiques if (report.contentAnalysis.complexityScore > 0.8) { report.recommendations.push('Simplifier le vocabulaire pour améliorer la lisibilité'); } if (simulationStats.fatigueModifications < 1) { report.recommendations.push('Augmenter l\'intensité de simulation fatigue'); } return report; } /** * HELPERS STATISTIQUES */ function calculateStatistics(values) { const sorted = values.slice().sort((a, b) => a - b); const length = values.length; return { mean: values.reduce((sum, val) => sum + val, 0) / length, median: length % 2 === 0 ? (sorted[length / 2 - 1] + sorted[length / 2]) / 2 : sorted[Math.floor(length / 2)], min: sorted[0], max: sorted[length - 1], stdDev: calculateStandardDeviation(values) }; } function calculateStandardDeviation(values) { const mean = values.reduce((sum, val) => sum + val, 0) / values.length; const squaredDifferences = values.map(val => Math.pow(val - mean, 2)); const variance = squaredDifferences.reduce((sum, val) => sum + val, 0) / values.length; return Math.sqrt(variance); } // ============= EXPORTS ============= module.exports = { analyzeContentComplexity, calculateReadabilityScore, preserveKeywords, validateSimulationQuality, generateQualityReport, calculateStatistics, calculateStandardDeviation, countSyllables, extractKeywords, calculateSimilarity, determineQualityIssue, QUALITY_THRESHOLDS, CRITICAL_KEYWORDS };