seo-generator-server/lib/post-processing/PatternBreaking.js

485 lines
16 KiB
JavaScript

// ========================================
// ORCHESTRATEUR PATTERN BREAKING - NIVEAU 2
// Responsabilité: Coordonner les 3 techniques anti-détection
// Objectif: -20% détection IA vs Niveau 1
// ========================================
const { logSh } = require('../ErrorReporting');
const { tracer } = require('../trace');
// Import des 3 techniques Pattern Breaking
const { applySentenceVariation } = require('./SentenceVariation');
const { removeLLMFingerprints } = require('./LLMFingerprintRemoval');
const { humanizeTransitions } = require('./TransitionHumanization');
/**
* MAIN ENTRY POINT - PATTERN BREAKING COMPLET
* @param {Object} input - { content: {}, csvData: {}, options: {} }
* @returns {Object} - { content: {}, stats: {}, debug: {} }
*/
async function applyPatternBreaking(input) {
return await tracer.run('PatternBreaking.applyPatternBreaking()', async () => {
const { content, csvData, options = {} } = input;
const config = {
// Configuration globale
intensity = 0.6, // Intensité générale (60%)
// Contrôle par technique
sentenceVariation: true, // Activer variation phrases
fingerprintRemoval: true, // Activer suppression empreintes
transitionHumanization: true, // Activer humanisation transitions
// Configuration spécifique par technique
sentenceVariationConfig: {
intensity: 0.3,
splitThreshold: 100,
mergeThreshold: 30,
preserveQuestions: true,
preserveTitles: true
},
fingerprintRemovalConfig: {
intensity: 1.0,
preserveKeywords: true,
contextualMode: true,
csvData
},
transitionHumanizationConfig: {
intensity: 0.6,
personalityStyle: csvData?.personality?.style,
avoidRepetition: true,
preserveFormal: false,
csvData
},
// Options avancées
qualityPreservation: true, // Préserver qualité contenu
seoIntegrity: true, // Maintenir intégrité SEO
readabilityCheck: true, // Vérifier lisibilité
...options // Override avec options fournies
};
await tracer.annotate({
level: 2,
technique: 'pattern_breaking',
elementsCount: Object.keys(content).length,
personality: csvData?.personality?.nom,
config: {
sentenceVariation: config.sentenceVariation,
fingerprintRemoval: config.fingerprintRemoval,
transitionHumanization: config.transitionHumanization,
intensity: config.intensity
}
});
const startTime = Date.now();
logSh(`🎯 NIVEAU 2: PATTERN BREAKING (3 techniques)`, 'INFO');
logSh(` 🎭 Personnalité: ${csvData?.personality?.nom} (${csvData?.personality?.style})`, 'INFO');
logSh(` 📊 ${Object.keys(content).length} éléments à traiter`, 'INFO');
logSh(` ⚙️ Techniques actives: ${[config.sentenceVariation && 'Variation', config.fingerprintRemoval && 'Empreintes', config.transitionHumanization && 'Transitions'].filter(Boolean).join(' + ')}`, 'INFO');
try {
let currentContent = { ...content };
const pipelineStats = {
techniques: [],
totalDuration: 0,
qualityMetrics: {}
};
// Analyse initiale de qualité
if (config.qualityPreservation) {
pipelineStats.qualityMetrics.initial = analyzeContentQuality(currentContent);
}
// TECHNIQUE 1: VARIATION LONGUEUR PHRASES
if (config.sentenceVariation) {
const step1Result = await applySentenceVariation({
content: currentContent,
config: config.sentenceVariationConfig,
context: { step: 1, totalSteps: 3 }
});
currentContent = step1Result.content;
pipelineStats.techniques.push({
name: 'SentenceVariation',
...step1Result.stats,
qualityImpact: calculateQualityImpact(content, step1Result.content)
});
logSh(` ✅ 1/3: Variation phrases - ${step1Result.stats.modified}/${step1Result.stats.processed} éléments`, 'INFO');
}
// TECHNIQUE 2: SUPPRESSION EMPREINTES LLM
if (config.fingerprintRemoval) {
const step2Result = await removeLLMFingerprints({
content: currentContent,
config: config.fingerprintRemovalConfig,
context: { step: 2, totalSteps: 3 }
});
currentContent = step2Result.content;
pipelineStats.techniques.push({
name: 'FingerprintRemoval',
...step2Result.stats,
qualityImpact: calculateQualityImpact(content, step2Result.content)
});
logSh(` ✅ 2/3: Suppression empreintes - ${step2Result.stats.totalReplacements} remplacements`, 'INFO');
}
// TECHNIQUE 3: HUMANISATION TRANSITIONS
if (config.transitionHumanization) {
const step3Result = await humanizeTransitions({
content: currentContent,
config: config.transitionHumanizationConfig,
context: { step: 3, totalSteps: 3 }
});
currentContent = step3Result.content;
pipelineStats.techniques.push({
name: 'TransitionHumanization',
...step3Result.stats,
qualityImpact: calculateQualityImpact(content, step3Result.content)
});
logSh(` ✅ 3/3: Humanisation transitions - ${step3Result.stats.totalReplacements} améliorations`, 'INFO');
}
// POST-PROCESSING: Vérifications qualité
if (config.qualityPreservation || config.readabilityCheck) {
const qualityCheck = performQualityChecks(content, currentContent, config);
pipelineStats.qualityMetrics.final = qualityCheck;
// Rollback si qualité trop dégradée
if (qualityCheck.shouldRollback) {
logSh(`⚠️ ROLLBACK: Qualité dégradée, retour contenu original`, 'WARNING');
currentContent = content;
pipelineStats.rollback = true;
}
}
// RÉSULTATS FINAUX
const totalDuration = Date.now() - startTime;
pipelineStats.totalDuration = totalDuration;
const totalModifications = pipelineStats.techniques.reduce((sum, tech) => {
return sum + (tech.modified || tech.totalReplacements || 0);
}, 0);
const stats = {
level: 2,
technique: 'pattern_breaking',
processed: Object.keys(content).length,
totalModifications,
techniquesUsed: pipelineStats.techniques.length,
duration: totalDuration,
techniques: pipelineStats.techniques,
qualityPreserved: !pipelineStats.rollback,
rollback: pipelineStats.rollback || false
};
logSh(`🎯 NIVEAU 2 TERMINÉ: ${totalModifications} modifications sur ${stats.processed} éléments (${totalDuration}ms)`, 'INFO');
// Log détaillé par technique
pipelineStats.techniques.forEach(tech => {
const modificationsCount = tech.modified || tech.totalReplacements || 0;
logSh(`${tech.name}: ${modificationsCount} modifications (${tech.duration}ms)`, 'DEBUG');
});
await tracer.event('Pattern breaking terminé', stats);
return {
content: currentContent,
stats,
debug: {
level: 2,
technique: 'pattern_breaking',
config,
pipeline: pipelineStats,
qualityMetrics: pipelineStats.qualityMetrics
}
};
} catch (error) {
const totalDuration = Date.now() - startTime;
logSh(`❌ NIVEAU 2 ÉCHOUÉ après ${totalDuration}ms: ${error.message}`, 'ERROR');
// Fallback: retourner contenu original
logSh(`🔄 Fallback: contenu original conservé`, 'WARNING');
await tracer.event('Pattern breaking échoué', {
error: error.message,
duration: totalDuration,
fallback: true
});
return {
content,
stats: {
level: 2,
technique: 'pattern_breaking',
processed: Object.keys(content).length,
totalModifications: 0,
duration: totalDuration,
error: error.message,
fallback: true
},
debug: { error: error.message, fallback: true }
};
}
}, input);
}
/**
* MODE DIAGNOSTIC - Test individuel des techniques
*/
async function diagnosticPatternBreaking(content, csvData) {
logSh(`🔬 DIAGNOSTIC NIVEAU 2: Test individuel des techniques`, 'INFO');
const diagnostics = {
techniques: [],
errors: [],
performance: {},
recommendations: []
};
const techniques = [
{ name: 'SentenceVariation', func: applySentenceVariation },
{ name: 'FingerprintRemoval', func: removeLLMFingerprints },
{ name: 'TransitionHumanization', func: humanizeTransitions }
];
for (const technique of techniques) {
try {
const startTime = Date.now();
const result = await technique.func({
content,
config: { csvData },
context: { diagnostic: true }
});
diagnostics.techniques.push({
name: technique.name,
success: true,
duration: Date.now() - startTime,
stats: result.stats,
effectivenessScore: calculateEffectivenessScore(result.stats)
});
} catch (error) {
diagnostics.errors.push({
technique: technique.name,
error: error.message
});
diagnostics.techniques.push({
name: technique.name,
success: false,
error: error.message
});
}
}
// Générer recommandations
diagnostics.recommendations = generateRecommendations(diagnostics.techniques);
const successfulTechniques = diagnostics.techniques.filter(t => t.success);
diagnostics.performance.totalDuration = diagnostics.techniques.reduce((sum, t) => sum + (t.duration || 0), 0);
diagnostics.performance.successRate = Math.round((successfulTechniques.length / techniques.length) * 100);
logSh(`🔬 DIAGNOSTIC TERMINÉ: ${successfulTechniques.length}/${techniques.length} techniques opérationnelles`, 'INFO');
return diagnostics;
}
/**
* Analyser qualité du contenu
*/
function analyzeContentQuality(content) {
const allText = Object.values(content).join(' ');
const wordCount = allText.split(/\s+/).length;
const avgWordsPerElement = wordCount / Object.keys(content).length;
// Métrique de lisibilité approximative (Flesch simplifié)
const sentences = allText.split(/[.!?]+/).filter(s => s.trim().length > 5);
const avgWordsPerSentence = wordCount / Math.max(1, sentences.length);
const readabilityScore = Math.max(0, 100 - (avgWordsPerSentence * 1.5));
return {
wordCount,
elementCount: Object.keys(content).length,
avgWordsPerElement: Math.round(avgWordsPerElement),
avgWordsPerSentence: Math.round(avgWordsPerSentence),
readabilityScore: Math.round(readabilityScore),
sentenceCount: sentences.length
};
}
/**
* Calculer impact qualité entre avant/après
*/
function calculateQualityImpact(originalContent, modifiedContent) {
const originalQuality = analyzeContentQuality(originalContent);
const modifiedQuality = analyzeContentQuality(modifiedContent);
const wordCountChange = ((modifiedQuality.wordCount - originalQuality.wordCount) / originalQuality.wordCount) * 100;
const readabilityChange = modifiedQuality.readabilityScore - originalQuality.readabilityScore;
return {
wordCountChange: Math.round(wordCountChange * 100) / 100,
readabilityChange: Math.round(readabilityChange),
severe: Math.abs(wordCountChange) > 10 || Math.abs(readabilityChange) > 15
};
}
/**
* Effectuer vérifications qualité
*/
function performQualityChecks(originalContent, modifiedContent, config) {
const originalQuality = analyzeContentQuality(originalContent);
const modifiedQuality = analyzeContentQuality(modifiedContent);
const qualityThresholds = {
maxWordCountChange: 15, // % max changement nombre mots
minReadabilityScore: 50, // Score lisibilité minimum
maxReadabilityDrop: 20 // Baisse max lisibilité
};
const issues = [];
// Vérification nombre de mots
const wordCountChange = Math.abs(modifiedQuality.wordCount - originalQuality.wordCount) / originalQuality.wordCount * 100;
if (wordCountChange > qualityThresholds.maxWordCountChange) {
issues.push({
type: 'word_count_change',
severity: 'high',
change: wordCountChange,
threshold: qualityThresholds.maxWordCountChange
});
}
// Vérification lisibilité
if (modifiedQuality.readabilityScore < qualityThresholds.minReadabilityScore) {
issues.push({
type: 'low_readability',
severity: 'medium',
score: modifiedQuality.readabilityScore,
threshold: qualityThresholds.minReadabilityScore
});
}
const readabilityDrop = originalQuality.readabilityScore - modifiedQuality.readabilityScore;
if (readabilityDrop > qualityThresholds.maxReadabilityDrop) {
issues.push({
type: 'readability_drop',
severity: 'high',
drop: readabilityDrop,
threshold: qualityThresholds.maxReadabilityDrop
});
}
// Décision rollback
const highSeverityIssues = issues.filter(issue => issue.severity === 'high');
const shouldRollback = highSeverityIssues.length > 0 && config.qualityPreservation;
return {
originalQuality,
modifiedQuality,
issues,
shouldRollback,
qualityScore: calculateOverallQualityScore(issues, modifiedQuality)
};
}
/**
* Calculer score de qualité global
*/
function calculateOverallQualityScore(issues, quality) {
let baseScore = 100;
issues.forEach(issue => {
const penalty = issue.severity === 'high' ? 30 : issue.severity === 'medium' ? 15 : 5;
baseScore -= penalty;
});
// Bonus pour bonne lisibilité
if (quality.readabilityScore > 70) baseScore += 10;
return Math.max(0, Math.min(100, baseScore));
}
/**
* Calculer score d'efficacité d'une technique
*/
function calculateEffectivenessScore(stats) {
if (!stats) return 0;
const modificationsCount = stats.modified || stats.totalReplacements || 0;
const processedCount = stats.processed || 1;
const modificationRate = (modificationsCount / processedCount) * 100;
// Score basé sur taux de modification et durée
const baseScore = Math.min(100, modificationRate * 2); // Max 50% modification = score 100
const durationPenalty = Math.max(0, (stats.duration - 1000) / 100); // Pénalité si > 1s
return Math.max(0, Math.round(baseScore - durationPenalty));
}
/**
* Générer recommandations basées sur diagnostic
*/
function generateRecommendations(techniqueResults) {
const recommendations = [];
techniqueResults.forEach(tech => {
if (!tech.success) {
recommendations.push({
type: 'error',
technique: tech.name,
message: `${tech.name} a échoué: ${tech.error}`,
action: 'Vérifier configuration et dépendances'
});
return;
}
const effectiveness = tech.effectivenessScore || 0;
if (effectiveness < 30) {
recommendations.push({
type: 'low_effectiveness',
technique: tech.name,
message: `${tech.name} peu efficace (score: ${effectiveness})`,
action: 'Augmenter intensité ou réviser configuration'
});
} else if (effectiveness > 80) {
recommendations.push({
type: 'high_effectiveness',
technique: tech.name,
message: `${tech.name} très efficace (score: ${effectiveness})`,
action: 'Configuration optimale'
});
}
if (tech.duration > 3000) {
recommendations.push({
type: 'performance',
technique: tech.name,
message: `${tech.name} lent (${tech.duration}ms)`,
action: 'Considérer réduction intensité ou optimisation'
});
}
});
return recommendations;
}
module.exports = {
applyPatternBreaking, // ← MAIN ENTRY POINT
diagnosticPatternBreaking, // ← Mode diagnostic
analyzeContentQuality,
performQualityChecks,
calculateQualityImpact,
calculateEffectivenessScore
};