seo-generator-server/lib/selective-enhancement/SelectiveUtils.js

579 lines
18 KiB
JavaScript

// ========================================
// SELECTIVE UTILS - UTILITAIRES MODULAIRES
// Responsabilité: Fonctions utilitaires partagées par tous les modules selective
// Architecture: Helper functions réutilisables et composables
// ========================================
const { logSh } = require('../ErrorReporting');
/**
* ANALYSEURS DE CONTENU SELECTIVE
*/
/**
* Analyser qualité technique d'un contenu
*/
function analyzeTechnicalQuality(content, contextualTerms = []) {
if (!content || typeof content !== 'string') return { score: 0, details: {} };
const analysis = {
score: 0,
details: {
technicalTermsFound: 0,
technicalTermsExpected: contextualTerms.length,
genericWordsCount: 0,
hasSpecifications: false,
hasDimensions: false,
contextIntegration: 0
}
};
const lowerContent = content.toLowerCase();
// 1. Compter termes techniques présents
contextualTerms.forEach(term => {
if (lowerContent.includes(term.toLowerCase())) {
analysis.details.technicalTermsFound++;
}
});
// 2. Détecter mots génériques
const genericWords = ['produit', 'solution', 'service', 'offre', 'article', 'élément'];
analysis.details.genericWordsCount = genericWords.filter(word =>
lowerContent.includes(word)
).length;
// 3. Vérifier spécifications techniques
analysis.details.hasSpecifications = /\b(norme|iso|din|ce)\b/i.test(content);
// 4. Vérifier dimensions/données techniques
analysis.details.hasDimensions = /\d+\s*(mm|cm|m|%|°|kg|g)\b/i.test(content);
// 5. Calculer score global (0-100)
const termRatio = contextualTerms.length > 0 ?
(analysis.details.technicalTermsFound / contextualTerms.length) * 40 : 20;
const genericPenalty = Math.min(20, analysis.details.genericWordsCount * 5);
const specificationBonus = analysis.details.hasSpecifications ? 15 : 0;
const dimensionBonus = analysis.details.hasDimensions ? 15 : 0;
const lengthBonus = content.length > 100 ? 10 : 0;
analysis.score = Math.max(0, Math.min(100,
termRatio + specificationBonus + dimensionBonus + lengthBonus - genericPenalty
));
return analysis;
}
/**
* Analyser fluidité des transitions
*/
function analyzeTransitionFluidity(content) {
if (!content || typeof content !== 'string') return { score: 0, details: {} };
const sentences = content.split(/[.!?]+/)
.map(s => s.trim())
.filter(s => s.length > 5);
if (sentences.length < 2) {
return { score: 100, details: { reason: 'Contenu trop court pour analyse transitions' } };
}
const analysis = {
score: 0,
details: {
sentencesCount: sentences.length,
connectorsFound: 0,
repetitiveConnectors: 0,
abruptTransitions: 0,
averageSentenceLength: 0,
lengthVariation: 0
}
};
// 1. Analyser connecteurs
const commonConnectors = ['par ailleurs', 'en effet', 'de plus', 'cependant', 'ainsi', 'donc', 'ensuite'];
const connectorCounts = {};
commonConnectors.forEach(connector => {
const matches = (content.match(new RegExp(`\\b${connector}\\b`, 'gi')) || []);
connectorCounts[connector] = matches.length;
analysis.details.connectorsFound += matches.length;
if (matches.length > 1) analysis.details.repetitiveConnectors++;
});
// 2. Détecter transitions abruptes
for (let i = 1; i < sentences.length; i++) {
const sentence = sentences[i].toLowerCase().trim();
const hasConnector = commonConnectors.some(connector =>
sentence.startsWith(connector) || sentence.includes(` ${connector} `)
);
if (!hasConnector && sentence.length > 20) {
analysis.details.abruptTransitions++;
}
}
// 3. Analyser variation de longueur
const lengths = sentences.map(s => s.split(/\s+/).length);
analysis.details.averageSentenceLength = lengths.reduce((a, b) => a + b, 0) / lengths.length;
const variance = lengths.reduce((acc, len) =>
acc + Math.pow(len - analysis.details.averageSentenceLength, 2), 0
) / lengths.length;
analysis.details.lengthVariation = Math.sqrt(variance);
// 4. Calculer score fluidité (0-100)
const connectorScore = Math.min(30, (analysis.details.connectorsFound / sentences.length) * 100);
const repetitionPenalty = Math.min(20, analysis.details.repetitiveConnectors * 5);
const abruptPenalty = Math.min(30, (analysis.details.abruptTransitions / sentences.length) * 50);
const variationScore = Math.min(20, analysis.details.lengthVariation * 2);
analysis.score = Math.max(0, Math.min(100,
connectorScore + variationScore - repetitionPenalty - abruptPenalty + 50
));
return analysis;
}
/**
* Analyser cohérence de style
*/
function analyzeStyleConsistency(content, expectedPersonality = null) {
if (!content || typeof content !== 'string') return { score: 0, details: {} };
const analysis = {
score: 0,
details: {
personalityAlignment: 0,
toneConsistency: 0,
vocabularyLevel: 'standard',
formalityScore: 0,
personalityWordsFound: 0
}
};
// 1. Analyser alignement personnalité
if (expectedPersonality && expectedPersonality.vocabulairePref) {
const personalityWords = expectedPersonality.vocabulairePref.toLowerCase().split(',');
const contentLower = content.toLowerCase();
personalityWords.forEach(word => {
if (word.trim() && contentLower.includes(word.trim())) {
analysis.details.personalityWordsFound++;
}
});
analysis.details.personalityAlignment = personalityWords.length > 0 ?
(analysis.details.personalityWordsFound / personalityWords.length) * 100 : 0;
}
// 2. Analyser niveau vocabulaire
const technicalWords = content.match(/\b\w{8,}\b/g) || [];
const totalWords = content.split(/\s+/).length;
const techRatio = technicalWords.length / totalWords;
if (techRatio > 0.15) analysis.details.vocabularyLevel = 'expert';
else if (techRatio < 0.05) analysis.details.vocabularyLevel = 'accessible';
else analysis.details.vocabularyLevel = 'standard';
// 3. Analyser formalité
const formalIndicators = ['il convient de', 'par conséquent', 'néanmoins', 'toutefois'];
const casualIndicators = ['du coup', 'sympa', 'cool', 'nickel'];
let formalCount = formalIndicators.filter(indicator =>
content.toLowerCase().includes(indicator)
).length;
let casualCount = casualIndicators.filter(indicator =>
content.toLowerCase().includes(indicator)
).length;
analysis.details.formalityScore = formalCount - casualCount; // Positif = formel, négatif = casual
// 4. Calculer score cohérence (0-100)
let baseScore = 50;
if (expectedPersonality) {
baseScore += analysis.details.personalityAlignment * 0.3;
// Ajustements selon niveau technique attendu
const expectedLevel = expectedPersonality.niveauTechnique || 'standard';
if (expectedLevel === analysis.details.vocabularyLevel) {
baseScore += 20;
} else {
baseScore -= 10;
}
}
// Bonus cohérence tonale
const sentences = content.split(/[.!?]+/).filter(s => s.length > 10);
if (sentences.length > 1) {
baseScore += Math.min(20, analysis.details.lengthVariation || 10);
}
analysis.score = Math.max(0, Math.min(100, baseScore));
return analysis;
}
/**
* COMPARATEURS ET MÉTRIQUES
*/
/**
* Comparer deux contenus et calculer taux amélioration
*/
function compareContentImprovement(original, enhanced, analysisType = 'general') {
if (!original || !enhanced) return { improvementRate: 0, details: {} };
const comparison = {
improvementRate: 0,
details: {
lengthChange: ((enhanced.length - original.length) / original.length) * 100,
wordCountChange: 0,
structuralChanges: 0,
contentPreserved: true
}
};
// 1. Analyser changements structurels
const originalSentences = original.split(/[.!?]+/).length;
const enhancedSentences = enhanced.split(/[.!?]+/).length;
comparison.details.structuralChanges = Math.abs(enhancedSentences - originalSentences);
// 2. Analyser changements de mots
const originalWords = original.toLowerCase().split(/\s+/).filter(w => w.length > 2);
const enhancedWords = enhanced.toLowerCase().split(/\s+/).filter(w => w.length > 2);
comparison.details.wordCountChange = enhancedWords.length - originalWords.length;
// 3. Vérifier préservation du contenu principal
const originalKeyWords = originalWords.filter(w => w.length > 4);
const preservedWords = originalKeyWords.filter(w => enhanced.toLowerCase().includes(w));
comparison.details.contentPreserved = (preservedWords.length / originalKeyWords.length) > 0.7;
// 4. Calculer taux amélioration selon type d'analyse
switch (analysisType) {
case 'technical':
const originalTech = analyzeTechnicalQuality(original);
const enhancedTech = analyzeTechnicalQuality(enhanced);
comparison.improvementRate = enhancedTech.score - originalTech.score;
break;
case 'transitions':
const originalFluid = analyzeTransitionFluidity(original);
const enhancedFluid = analyzeTransitionFluidity(enhanced);
comparison.improvementRate = enhancedFluid.score - originalFluid.score;
break;
case 'style':
const originalStyle = analyzeStyleConsistency(original);
const enhancedStyle = analyzeStyleConsistency(enhanced);
comparison.improvementRate = enhancedStyle.score - originalStyle.score;
break;
default:
// Amélioration générale (moyenne pondérée)
comparison.improvementRate = Math.min(50, Math.abs(comparison.details.lengthChange) * 0.1 +
(comparison.details.contentPreserved ? 20 : -20) +
Math.min(15, Math.abs(comparison.details.wordCountChange)));
}
return comparison;
}
/**
* UTILITAIRES DE CONTENU
*/
/**
* Nettoyer contenu généré par LLM
*/
function cleanGeneratedContent(content, cleaningLevel = 'standard') {
if (!content || typeof content !== 'string') return content;
let cleaned = content.trim();
// Nettoyage de base
cleaned = cleaned.replace(/^(voici\s+)?le\s+contenu\s+(amélioré|modifié|réécrit)[:\s]*/gi, '');
cleaned = cleaned.replace(/^(bon,?\s*)?(alors,?\s*)?(voici\s+)?/gi, '');
cleaned = cleaned.replace(/^(avec\s+les?\s+)?améliorations?\s*[:\s]*/gi, '');
// Nettoyage formatage
cleaned = cleaned.replace(/\*\*([^*]+)\*\*/g, '$1'); // Gras markdown → texte normal
cleaned = cleaned.replace(/\s{2,}/g, ' '); // Espaces multiples
cleaned = cleaned.replace(/([.!?])\s*([.!?])/g, '$1 '); // Double ponctuation
if (cleaningLevel === 'intensive') {
// Nettoyage intensif
cleaned = cleaned.replace(/^\s*[-*+]\s*/gm, ''); // Puces en début de ligne
cleaned = cleaned.replace(/^(pour\s+)?(ce\s+)?(contenu\s*)?[,:]?\s*/gi, '');
cleaned = cleaned.replace(/\([^)]*\)/g, ''); // Parenthèses et contenu
}
// Nettoyage final
cleaned = cleaned.replace(/^[,.\s]+/, ''); // Début
cleaned = cleaned.replace(/[,\s]+$/, ''); // Fin
cleaned = cleaned.trim();
return cleaned;
}
/**
* Valider contenu selective
*/
function validateSelectiveContent(content, originalContent, criteria = {}) {
const validation = {
isValid: true,
score: 0,
issues: [],
suggestions: []
};
const {
minLength = 20,
maxLengthChange = 50, // % de changement maximum
preserveContent = true,
checkTechnicalTerms = true
} = criteria;
// 1. Vérifier longueur
if (!content || content.length < minLength) {
validation.isValid = false;
validation.issues.push('Contenu trop court');
validation.suggestions.push('Augmenter la longueur du contenu généré');
} else {
validation.score += 25;
}
// 2. Vérifier changements de longueur
if (originalContent) {
const lengthChange = Math.abs((content.length - originalContent.length) / originalContent.length) * 100;
if (lengthChange > maxLengthChange) {
validation.issues.push('Changement de longueur excessif');
validation.suggestions.push('Réduire l\'intensité d\'amélioration');
} else {
validation.score += 25;
}
// 3. Vérifier préservation du contenu
if (preserveContent) {
const preservation = compareContentImprovement(originalContent, content);
if (!preservation.details.contentPreserved) {
validation.isValid = false;
validation.issues.push('Contenu original non préservé');
validation.suggestions.push('Améliorer conservation du sens original');
} else {
validation.score += 25;
}
}
}
// 4. Vérifications spécifiques
if (checkTechnicalTerms) {
const technicalQuality = analyzeTechnicalQuality(content);
if (technicalQuality.score > 60) {
validation.score += 25;
} else if (technicalQuality.score < 30) {
validation.issues.push('Qualité technique insuffisante');
validation.suggestions.push('Ajouter plus de termes techniques spécialisés');
}
}
// Score final et validation
validation.score = Math.min(100, validation.score);
validation.isValid = validation.isValid && validation.score >= 60;
return validation;
}
/**
* UTILITAIRES TECHNIQUES
*/
/**
* Chunk array avec gestion intelligente
*/
function chunkArray(array, chunkSize, smartChunking = false) {
if (!Array.isArray(array)) return [];
if (array.length <= chunkSize) return [array];
const chunks = [];
if (smartChunking) {
// Chunking intelligent : éviter de séparer éléments liés
let currentChunk = [];
for (let i = 0; i < array.length; i++) {
currentChunk.push(array[i]);
// Conditions de fin de chunk intelligente
const isChunkFull = currentChunk.length >= chunkSize;
const isLastElement = i === array.length - 1;
const nextElementRelated = i < array.length - 1 &&
array[i].tag && array[i + 1].tag &&
array[i].tag.includes('FAQ') && array[i + 1].tag.includes('FAQ');
if ((isChunkFull && !nextElementRelated) || isLastElement) {
chunks.push([...currentChunk]);
currentChunk = [];
}
}
// Ajouter chunk restant si non vide
if (currentChunk.length > 0) {
if (chunks.length > 0 && chunks[chunks.length - 1].length + currentChunk.length <= chunkSize * 1.2) {
// Merger avec dernier chunk si pas trop gros
chunks[chunks.length - 1].push(...currentChunk);
} else {
chunks.push(currentChunk);
}
}
} else {
// Chunking standard
for (let i = 0; i < array.length; i += chunkSize) {
chunks.push(array.slice(i, i + chunkSize));
}
}
return chunks;
}
/**
* Sleep avec logging optionnel
*/
async function sleep(ms, logMessage = null) {
if (logMessage) {
logSh(`${logMessage} (${ms}ms)`, 'DEBUG');
}
return new Promise(resolve => setTimeout(resolve, ms));
}
/**
* Mesurer performance d'opération
*/
function measurePerformance(operationName, startTime = Date.now()) {
const endTime = Date.now();
const duration = endTime - startTime;
const performance = {
operationName,
startTime,
endTime,
duration,
durationFormatted: formatDuration(duration)
};
return performance;
}
/**
* Formater durée en format lisible
*/
function formatDuration(ms) {
if (ms < 1000) return `${ms}ms`;
if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
return `${Math.floor(ms / 60000)}m ${Math.floor((ms % 60000) / 1000)}s`;
}
/**
* STATISTIQUES ET RAPPORTS
*/
/**
* Générer rapport amélioration
*/
function generateImprovementReport(originalContent, enhancedContent, layerType = 'general') {
const report = {
layerType,
timestamp: new Date().toISOString(),
summary: {
elementsProcessed: 0,
elementsImproved: 0,
averageImprovement: 0,
totalExecutionTime: 0
},
details: {
byElement: [],
qualityMetrics: {},
recommendations: []
}
};
// Analyser chaque élément
Object.keys(originalContent).forEach(tag => {
const original = originalContent[tag];
const enhanced = enhancedContent[tag];
if (original && enhanced) {
report.summary.elementsProcessed++;
const improvement = compareContentImprovement(original, enhanced, layerType);
if (improvement.improvementRate > 0) {
report.summary.elementsImproved++;
}
report.summary.averageImprovement += improvement.improvementRate;
report.details.byElement.push({
tag,
improvementRate: improvement.improvementRate,
lengthChange: improvement.details.lengthChange,
contentPreserved: improvement.details.contentPreserved
});
}
});
// Calculer moyennes
if (report.summary.elementsProcessed > 0) {
report.summary.averageImprovement = report.summary.averageImprovement / report.summary.elementsProcessed;
}
// Métriques qualité globales
const fullOriginal = Object.values(originalContent).join(' ');
const fullEnhanced = Object.values(enhancedContent).join(' ');
report.details.qualityMetrics = {
technical: analyzeTechnicalQuality(fullEnhanced),
transitions: analyzeTransitionFluidity(fullEnhanced),
style: analyzeStyleConsistency(fullEnhanced)
};
// Recommandations
if (report.summary.averageImprovement < 10) {
report.details.recommendations.push('Augmenter l\'intensité d\'amélioration');
}
if (report.details.byElement.some(e => !e.contentPreserved)) {
report.details.recommendations.push('Améliorer préservation du contenu original');
}
return report;
}
module.exports = {
// Analyseurs
analyzeTechnicalQuality,
analyzeTransitionFluidity,
analyzeStyleConsistency,
// Comparateurs
compareContentImprovement,
// Utilitaires contenu
cleanGeneratedContent,
validateSelectiveContent,
// Utilitaires techniques
chunkArray,
sleep,
measurePerformance,
formatDuration,
// Rapports
generateImprovementReport
};