seo-generator-server/lib/post-processing/TransitionHumanization.js

526 lines
16 KiB
JavaScript

// ========================================
// PATTERN BREAKING - TECHNIQUE 3: TRANSITION HUMANIZATION
// Responsabilité: Remplacer connecteurs mécaniques par transitions naturelles
// Anti-détection: Éviter patterns de liaison typiques des LLMs
// ========================================
const { logSh } = require('../ErrorReporting');
const { tracer } = require('../trace');
/**
* DICTIONNAIRE CONNECTEURS HUMANISÉS
* Connecteurs LLM → Alternatives naturelles par contexte
*/
const TRANSITION_REPLACEMENTS = {
// Connecteurs trop formels → versions naturelles
'par ailleurs': {
alternatives: ['d\'ailleurs', 'au fait', 'soit dit en passant', 'à propos', 'sinon'],
weight: 0.8,
contexts: ['casual', 'conversational']
},
'en effet': {
alternatives: ['effectivement', 'c\'est vrai', 'tout à fait', 'absolument', 'exactement'],
weight: 0.9,
contexts: ['confirmative', 'agreement']
},
'de plus': {
alternatives: ['aussi', 'également', 'qui plus est', 'en plus', 'et puis'],
weight: 0.7,
contexts: ['additive', 'continuation']
},
'cependant': {
alternatives: ['mais', 'pourtant', 'néanmoins', 'malgré tout', 'quand même'],
weight: 0.6,
contexts: ['contrast', 'opposition']
},
'ainsi': {
alternatives: ['donc', 'du coup', 'comme ça', 'par conséquent', 'résultat'],
weight: 0.8,
contexts: ['consequence', 'result']
},
'donc': {
alternatives: ['du coup', 'alors', 'par conséquent', 'ainsi', 'résultat'],
weight: 0.5,
contexts: ['consequence', 'logical']
},
// Connecteurs de séquence
'ensuite': {
alternatives: ['puis', 'après', 'et puis', 'alors', 'du coup'],
weight: 0.6,
contexts: ['sequence', 'temporal']
},
'puis': {
alternatives: ['ensuite', 'après', 'et puis', 'alors'],
weight: 0.4,
contexts: ['sequence', 'temporal']
},
// Connecteurs d'emphase
'également': {
alternatives: ['aussi', 'de même', 'pareillement', 'en plus'],
weight: 0.6,
contexts: ['similarity', 'addition']
},
'aussi': {
alternatives: ['également', 'de même', 'en plus', 'pareillement'],
weight: 0.3,
contexts: ['similarity', 'addition']
},
// Connecteurs de conclusion
'enfin': {
alternatives: ['finalement', 'au final', 'pour finir', 'en dernier'],
weight: 0.5,
contexts: ['conclusion', 'final']
},
'finalement': {
alternatives: ['au final', 'en fin de compte', 'pour finir', 'enfin'],
weight: 0.4,
contexts: ['conclusion', 'final']
}
};
/**
* PATTERNS DE TRANSITION NATURELLE
* Selon le style de personnalité
*/
const PERSONALITY_TRANSITIONS = {
'décontracté': {
preferred: ['du coup', 'alors', 'bon', 'après', 'sinon'],
avoided: ['par conséquent', 'néanmoins', 'toutefois']
},
'technique': {
preferred: ['donc', 'ainsi', 'par conséquent', 'résultat'],
avoided: ['du coup', 'bon', 'franchement']
},
'commercial': {
preferred: ['aussi', 'de plus', 'également', 'qui plus est'],
avoided: ['du coup', 'bon', 'franchement']
},
'familier': {
preferred: ['du coup', 'bon', 'alors', 'après', 'franchement'],
avoided: ['par conséquent', 'néanmoins', 'de surcroît']
}
};
/**
* MAIN ENTRY POINT - HUMANISATION TRANSITIONS
* @param {Object} input - { content: {}, config: {}, context: {} }
* @returns {Object} - { content: {}, stats: {}, debug: {} }
*/
async function humanizeTransitions(input) {
return await tracer.run('TransitionHumanization.humanizeTransitions()', async () => {
const { content, config = {}, context = {} } = input;
const {
intensity = 0.6, // Probabilité de remplacement (60%)
personalityStyle = null, // Style de personnalité pour guidage
avoidRepetition = true, // Éviter répétitions excessives
preserveFormal = false, // Préserver style formel
csvData = null // Données pour personnalité
} = config;
await tracer.annotate({
technique: 'transition_humanization',
intensity,
personalityStyle: personalityStyle || csvData?.personality?.style,
elementsCount: Object.keys(content).length
});
const startTime = Date.now();
logSh(`🔗 TECHNIQUE 3/3: Humanisation transitions (intensité: ${intensity})`, 'INFO');
logSh(` 📊 ${Object.keys(content).length} éléments à humaniser`, 'DEBUG');
try {
const results = {};
let totalProcessed = 0;
let totalReplacements = 0;
let humanizationDetails = [];
// Extraire style de personnalité
const effectivePersonalityStyle = personalityStyle || csvData?.personality?.style || 'neutral';
// Analyser patterns globaux pour éviter répétitions
const globalPatterns = analyzeGlobalTransitionPatterns(content);
// Traiter chaque élément de contenu
for (const [tag, text] of Object.entries(content)) {
totalProcessed++;
if (text.length < 30) {
results[tag] = text;
continue;
}
// Appliquer humanisation des transitions
const humanizationResult = humanizeTextTransitions(text, {
intensity,
personalityStyle: effectivePersonalityStyle,
avoidRepetition,
preserveFormal,
globalPatterns,
tag
});
results[tag] = humanizationResult.text;
if (humanizationResult.replacements.length > 0) {
totalReplacements += humanizationResult.replacements.length;
humanizationDetails.push({
tag,
replacements: humanizationResult.replacements,
transitionsDetected: humanizationResult.transitionsFound
});
logSh(` 🔄 [${tag}]: ${humanizationResult.replacements.length} transitions humanisées`, 'DEBUG');
} else {
logSh(` ➡️ [${tag}]: Transitions déjà naturelles`, 'DEBUG');
}
}
const duration = Date.now() - startTime;
const stats = {
processed: totalProcessed,
totalReplacements,
avgReplacementsPerElement: Math.round(totalReplacements / totalProcessed * 100) / 100,
elementsWithTransitions: humanizationDetails.length,
personalityStyle: effectivePersonalityStyle,
duration,
technique: 'transition_humanization'
};
logSh(`✅ HUMANISATION TRANSITIONS: ${stats.totalReplacements} remplacements sur ${stats.elementsWithTransitions}/${stats.processed} éléments en ${duration}ms`, 'INFO');
await tracer.event('Transition humanization terminée', stats);
return {
content: results,
stats,
debug: {
technique: 'transition_humanization',
config: { intensity, personalityStyle: effectivePersonalityStyle, avoidRepetition },
humanizations: humanizationDetails,
globalPatterns
}
};
} catch (error) {
const duration = Date.now() - startTime;
logSh(`❌ HUMANISATION TRANSITIONS échouée après ${duration}ms: ${error.message}`, 'ERROR');
throw new Error(`TransitionHumanization failed: ${error.message}`);
}
}, input);
}
/**
* Humaniser les transitions d'un texte
*/
function humanizeTextTransitions(text, config) {
const { intensity, personalityStyle, avoidRepetition, preserveFormal, globalPatterns, tag } = config;
let humanizedText = text;
const replacements = [];
const transitionsFound = [];
// Statistiques usage pour éviter répétitions
const usageStats = {};
// Traiter chaque connecteur du dictionnaire
for (const [transition, transitionData] of Object.entries(TRANSITION_REPLACEMENTS)) {
const { alternatives, weight, contexts } = transitionData;
// Rechercher occurrences (insensible à la casse, mais préserver limites mots)
const regex = new RegExp(`\\b${escapeRegex(transition)}\\b`, 'gi');
const matches = [...text.matchAll(regex)];
if (matches.length > 0) {
transitionsFound.push(transition);
// Décider si on remplace selon intensité et poids
const shouldReplace = Math.random() < (intensity * weight);
if (shouldReplace && !preserveFormal) {
// Sélectionner meilleure alternative
const selectedAlternative = selectBestTransitionAlternative(
alternatives,
personalityStyle,
usageStats,
avoidRepetition
);
// Appliquer remplacement en préservant la casse
humanizedText = humanizedText.replace(regex, (match) => {
return preserveCase(match, selectedAlternative);
});
// Enregistrer usage
usageStats[selectedAlternative] = (usageStats[selectedAlternative] || 0) + matches.length;
replacements.push({
original: transition,
replacement: selectedAlternative,
occurrences: matches.length,
contexts,
personalityMatch: isPersonalityAppropriate(selectedAlternative, personalityStyle)
});
}
}
}
// Post-processing : éviter accumulations
if (avoidRepetition) {
const repetitionCleaned = reduceTransitionRepetition(humanizedText, usageStats);
humanizedText = repetitionCleaned.text;
replacements.push(...repetitionCleaned.additionalChanges);
}
return {
text: humanizedText,
replacements,
transitionsFound
};
}
/**
* Sélectionner meilleure alternative de transition
*/
function selectBestTransitionAlternative(alternatives, personalityStyle, usageStats, avoidRepetition) {
// Filtrer selon personnalité
const personalityFiltered = alternatives.filter(alt =>
isPersonalityAppropriate(alt, personalityStyle)
);
const candidateList = personalityFiltered.length > 0 ? personalityFiltered : alternatives;
if (!avoidRepetition) {
return candidateList[Math.floor(Math.random() * candidateList.length)];
}
// Éviter les alternatives déjà trop utilisées
const lessUsedAlternatives = candidateList.filter(alt =>
(usageStats[alt] || 0) < 2
);
const finalList = lessUsedAlternatives.length > 0 ? lessUsedAlternatives : candidateList;
return finalList[Math.floor(Math.random() * finalList.length)];
}
/**
* Vérifier si alternative appropriée pour personnalité
*/
function isPersonalityAppropriate(alternative, personalityStyle) {
if (!personalityStyle || personalityStyle === 'neutral') return true;
const styleMapping = {
'décontracté': PERSONALITY_TRANSITIONS.décontracté,
'technique': PERSONALITY_TRANSITIONS.technique,
'commercial': PERSONALITY_TRANSITIONS.commercial,
'familier': PERSONALITY_TRANSITIONS.familier
};
const styleConfig = styleMapping[personalityStyle.toLowerCase()];
if (!styleConfig) return true;
// Éviter les connecteurs inappropriés
if (styleConfig.avoided.includes(alternative)) return false;
// Privilégier les connecteurs préférés
if (styleConfig.preferred.includes(alternative)) return true;
return true;
}
/**
* Réduire répétitions excessives de transitions
*/
function reduceTransitionRepetition(text, usageStats) {
let processedText = text;
const additionalChanges = [];
// Identifier connecteurs surutilisés (>3 fois)
const overusedTransitions = Object.entries(usageStats)
.filter(([transition, count]) => count > 3)
.map(([transition]) => transition);
for (const overusedTransition of overusedTransitions) {
// Remplacer quelques occurrences par des alternatives
const regex = new RegExp(`\\b${escapeRegex(overusedTransition)}\\b`, 'g');
let replacements = 0;
processedText = processedText.replace(regex, (match, offset) => {
// Remplacer 1 occurrence sur 3 environ
if (Math.random() < 0.33 && replacements < 2) {
replacements++;
const alternatives = findAlternativesFor(overusedTransition);
const alternative = alternatives[Math.floor(Math.random() * alternatives.length)];
additionalChanges.push({
type: 'repetition_reduction',
original: overusedTransition,
replacement: alternative,
reason: 'overuse'
});
return preserveCase(match, alternative);
}
return match;
});
}
return { text: processedText, additionalChanges };
}
/**
* Trouver alternatives pour un connecteur donné
*/
function findAlternativesFor(transition) {
// Chercher dans le dictionnaire
for (const [key, data] of Object.entries(TRANSITION_REPLACEMENTS)) {
if (data.alternatives.includes(transition)) {
return data.alternatives.filter(alt => alt !== transition);
}
}
// Alternatives génériques
const genericAlternatives = {
'du coup': ['alors', 'donc', 'ainsi'],
'alors': ['du coup', 'donc', 'ensuite'],
'donc': ['du coup', 'alors', 'ainsi'],
'aussi': ['également', 'de plus', 'en plus'],
'mais': ['cependant', 'pourtant', 'néanmoins']
};
return genericAlternatives[transition] || ['donc', 'alors'];
}
/**
* Analyser patterns globaux de transitions
*/
function analyzeGlobalTransitionPatterns(content) {
const allText = Object.values(content).join(' ');
const transitionCounts = {};
const repetitionPatterns = [];
// Compter occurrences globales
for (const transition of Object.keys(TRANSITION_REPLACEMENTS)) {
const regex = new RegExp(`\\b${escapeRegex(transition)}\\b`, 'gi');
const matches = allText.match(regex);
if (matches) {
transitionCounts[transition] = matches.length;
}
}
// Identifier patterns de répétition problématiques
const sortedTransitions = Object.entries(transitionCounts)
.sort(([,a], [,b]) => b - a)
.slice(0, 5); // Top 5 plus utilisées
sortedTransitions.forEach(([transition, count]) => {
if (count > 5) {
repetitionPatterns.push({
transition,
count,
severity: count > 10 ? 'high' : count > 7 ? 'medium' : 'low'
});
}
});
return {
transitionCounts,
repetitionPatterns,
diversityScore: Object.keys(transitionCounts).length / Math.max(1, Object.values(transitionCounts).reduce((a,b) => a+b, 0))
};
}
/**
* Préserver la casse originale
*/
function preserveCase(original, replacement) {
if (original === original.toUpperCase()) {
return replacement.toUpperCase();
} else if (original[0] === original[0].toUpperCase()) {
return replacement.charAt(0).toUpperCase() + replacement.slice(1).toLowerCase();
} else {
return replacement.toLowerCase();
}
}
/**
* Échapper caractères regex
*/
function escapeRegex(text) {
return text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
/**
* Analyser qualité des transitions d'un texte
*/
function analyzeTransitionQuality(text) {
const sentences = text.split(/[.!?]+/).filter(s => s.trim().length > 5);
if (sentences.length < 2) {
return { score: 100, issues: [], naturalness: 'high' };
}
let mechanicalTransitions = 0;
let totalTransitions = 0;
const issues = [];
// Analyser chaque transition
sentences.forEach((sentence, index) => {
if (index === 0) return;
const trimmed = sentence.trim();
const startsWithTransition = Object.keys(TRANSITION_REPLACEMENTS).some(transition =>
trimmed.toLowerCase().startsWith(transition.toLowerCase())
);
if (startsWithTransition) {
totalTransitions++;
// Vérifier si transition mécanique
const transition = Object.keys(TRANSITION_REPLACEMENTS).find(t =>
trimmed.toLowerCase().startsWith(t.toLowerCase())
);
if (transition && TRANSITION_REPLACEMENTS[transition].weight > 0.7) {
mechanicalTransitions++;
issues.push({
type: 'mechanical_transition',
transition,
suggestion: TRANSITION_REPLACEMENTS[transition].alternatives[0]
});
}
}
});
const mechanicalRatio = totalTransitions > 0 ? mechanicalTransitions / totalTransitions : 0;
const score = Math.max(0, 100 - (mechanicalRatio * 100));
let naturalness = 'high';
if (mechanicalRatio > 0.5) naturalness = 'low';
else if (mechanicalRatio > 0.25) naturalness = 'medium';
return { score: Math.round(score), issues, naturalness, mechanicalRatio };
}
module.exports = {
humanizeTransitions, // ← MAIN ENTRY POINT
humanizeTextTransitions,
analyzeTransitionQuality,
analyzeGlobalTransitionPatterns,
TRANSITION_REPLACEMENTS,
PERSONALITY_TRANSITIONS
};