seo-generator-server/lib/pattern-breaking/SyntaxVariations.js

380 lines
12 KiB
JavaScript

// ========================================
// FICHIER: SyntaxVariations.js
// RESPONSABILITÉ: Variations syntaxiques pour casser patterns LLM
// Techniques: découpage, fusion, restructuration phrases
// ========================================
const { logSh } = require('../ErrorReporting');
/**
* PATTERNS SYNTAXIQUES TYPIQUES LLM À ÉVITER
*/
const LLM_SYNTAX_PATTERNS = {
// Structures trop prévisibles
repetitiveStarts: [
/^Il est important de/gi,
/^Il convient de/gi,
/^Il faut noter que/gi,
/^Dans ce contexte/gi,
/^Par ailleurs/gi
],
// Phrases trop parfaites
perfectStructures: [
/^De plus, .+ En outre, .+ Enfin,/gi,
/^Premièrement, .+ Deuxièmement, .+ Troisièmement,/gi
],
// Longueurs trop régulières (détection pattern)
uniformLengths: true // Détecté dynamiquement
};
/**
* VARIATION STRUCTURES SYNTAXIQUES - FONCTION PRINCIPALE
* @param {string} text - Texte à varier
* @param {number} intensity - Intensité variation (0-1)
* @param {object} options - Options { preserveReadability, maxModifications }
* @returns {object} - { content, modifications, stats }
*/
function varyStructures(text, intensity = 0.3, options = {}) {
if (!text || text.trim().length === 0) {
return { content: text, modifications: 0 };
}
const config = {
preserveReadability: true,
maxModifications: 3,
...options
};
logSh(`📝 Variation syntaxique: intensité ${intensity}, préservation: ${config.preserveReadability}`, 'DEBUG');
let modifiedText = text;
let totalModifications = 0;
const stats = {
sentencesSplit: 0,
sentencesMerged: 0,
structuresReorganized: 0,
repetitiveStartsFixed: 0
};
try {
// 1. Analyser structure phrases
const sentences = analyzeSentenceStructure(modifiedText);
logSh(` 📊 ${sentences.length} phrases analysées`, 'DEBUG');
// 2. Découper phrases longues
if (Math.random() < intensity) {
const splitResult = splitLongSentences(modifiedText, intensity);
modifiedText = splitResult.content;
totalModifications += splitResult.modifications;
stats.sentencesSplit = splitResult.modifications;
}
// 3. Fusionner phrases courtes
if (Math.random() < intensity * 0.7) {
const mergeResult = mergeShorter(modifiedText, intensity);
modifiedText = mergeResult.content;
totalModifications += mergeResult.modifications;
stats.sentencesMerged = mergeResult.modifications;
}
// 4. Réorganiser structures prévisibles
if (Math.random() < intensity * 0.8) {
const reorganizeResult = reorganizeStructures(modifiedText, intensity);
modifiedText = reorganizeResult.content;
totalModifications += reorganizeResult.modifications;
stats.structuresReorganized = reorganizeResult.modifications;
}
// 5. Corriger débuts répétitifs
if (Math.random() < intensity * 0.6) {
const repetitiveResult = fixRepetitiveStarts(modifiedText);
modifiedText = repetitiveResult.content;
totalModifications += repetitiveResult.modifications;
stats.repetitiveStartsFixed = repetitiveResult.modifications;
}
// 6. Limitation sécurité
if (totalModifications > config.maxModifications) {
logSh(` ⚠️ Limitation appliquée: ${totalModifications}${config.maxModifications} modifications`, 'DEBUG');
totalModifications = config.maxModifications;
}
logSh(`📝 Syntaxe modifiée: ${totalModifications} changements (${stats.sentencesSplit} splits, ${stats.sentencesMerged} merges)`, 'DEBUG');
} catch (error) {
logSh(`❌ Erreur variation syntaxique: ${error.message}`, 'WARNING');
return { content: text, modifications: 0, stats: {} };
}
return {
content: modifiedText,
modifications: totalModifications,
stats
};
}
/**
* ANALYSE STRUCTURE PHRASES
*/
function analyzeSentenceStructure(text) {
const sentences = text.split(/[.!?]+/).filter(s => s.trim().length > 0);
return sentences.map((sentence, index) => ({
index,
content: sentence.trim(),
length: sentence.trim().length,
wordCount: sentence.trim().split(/\s+/).length,
isLong: sentence.trim().length > 120,
isShort: sentence.trim().length < 40,
hasComplexStructure: sentence.includes(',') && sentence.includes(' qui ') || sentence.includes(' que ')
}));
}
/**
* DÉCOUPAGE PHRASES LONGUES
*/
function splitLongSentences(text, intensity) {
let modified = text;
let modifications = 0;
const sentences = modified.split('. ');
const processedSentences = sentences.map(sentence => {
// Phrases longues (>100 chars) et probabilité selon intensité - PLUS AGRESSIF
if (sentence.length > 100 && Math.random() < (intensity * 0.6)) {
// Points de découpe naturels
const cutPoints = [
{ pattern: /, qui (.+)/, replacement: '. Celui-ci $1' },
{ pattern: /, que (.+)/, replacement: '. Cela $1' },
{ pattern: /, dont (.+)/, replacement: '. Celui-ci $1' },
{ pattern: / et (.{30,})/, replacement: '. De plus, $1' },
{ pattern: /, car (.+)/, replacement: '. En effet, $1' },
{ pattern: /, mais (.+)/, replacement: '. Cependant, $1' }
];
for (const cutPoint of cutPoints) {
if (sentence.match(cutPoint.pattern)) {
const newSentence = sentence.replace(cutPoint.pattern, cutPoint.replacement);
if (newSentence !== sentence) {
modifications++;
logSh(` ✂️ Phrase découpée: ${sentence.length}${newSentence.length} chars`, 'DEBUG');
return newSentence;
}
}
}
}
return sentence;
});
return {
content: processedSentences.join('. '),
modifications
};
}
/**
* FUSION PHRASES COURTES
*/
function mergeShorter(text, intensity) {
let modified = text;
let modifications = 0;
const sentences = modified.split('. ');
const processedSentences = [];
for (let i = 0; i < sentences.length; i++) {
const current = sentences[i];
const next = sentences[i + 1];
// Si phrase courte (<50 chars) et phrase suivante existe - PLUS AGRESSIF
if (current && current.length < 50 && next && next.length < 70 && Math.random() < (intensity * 0.5)) {
// Connecteurs pour fusion naturelle
const connectors = [', de plus,', ', également,', ', aussi,', ' et'];
const connector = connectors[Math.floor(Math.random() * connectors.length)];
const merged = current + connector + ' ' + next.toLowerCase();
processedSentences.push(merged);
modifications++;
logSh(` 🔗 Phrases fusionnées: ${current.length} + ${next.length}${merged.length} chars`, 'DEBUG');
i++; // Passer la phrase suivante car fusionnée
} else {
processedSentences.push(current);
}
}
return {
content: processedSentences.join('. '),
modifications
};
}
/**
* RÉORGANISATION STRUCTURES PRÉVISIBLES
*/
function reorganizeStructures(text, intensity) {
let modified = text;
let modifications = 0;
// Détecter énumérations prévisibles
const enumerationPatterns = [
{
pattern: /Premièrement, (.+?)\. Deuxièmement, (.+?)\. Troisièmement, (.+?)\./gi,
replacement: 'D\'abord, $1. Ensuite, $2. Enfin, $3.'
},
{
pattern: /D\'une part, (.+?)\. D\'autre part, (.+?)\./gi,
replacement: 'Tout d\'abord, $1. Par ailleurs, $2.'
},
{
pattern: /En premier lieu, (.+?)\. En second lieu, (.+?)\./gi,
replacement: 'Dans un premier temps, $1. Puis, $2.'
}
];
enumerationPatterns.forEach(pattern => {
if (modified.match(pattern.pattern) && Math.random() < intensity) {
modified = modified.replace(pattern.pattern, pattern.replacement);
modifications++;
logSh(` 🔄 Structure réorganisée: énumération variée`, 'DEBUG');
}
});
return {
content: modified,
modifications
};
}
/**
* CORRECTION DÉBUTS RÉPÉTITIFS
*/
function fixRepetitiveStarts(text) {
let modified = text;
let modifications = 0;
const sentences = modified.split('. ');
const startWords = [];
// Analyser débuts de phrases
sentences.forEach(sentence => {
const words = sentence.trim().split(/\s+/);
if (words.length > 0) {
startWords.push(words[0].toLowerCase());
}
});
// Détecter répétitions
const startCounts = {};
startWords.forEach(word => {
startCounts[word] = (startCounts[word] || 0) + 1;
});
// Remplacer débuts répétitifs
const alternatives = {
'il': ['Cet élément', 'Cette solution', 'Ce produit'],
'cette': ['Cette option', 'Cette approche', 'Cette méthode'],
'pour': ['Afin de', 'Dans le but de', 'En vue de'],
'avec': ['Grâce à', 'Au moyen de', 'En utilisant'],
'dans': ['Au sein de', 'À travers', 'Parmi']
};
const processedSentences = sentences.map(sentence => {
const firstWord = sentence.trim().split(/\s+/)[0]?.toLowerCase();
if (firstWord && startCounts[firstWord] > 2 && alternatives[firstWord] && Math.random() < 0.4) {
const replacement = alternatives[firstWord][Math.floor(Math.random() * alternatives[firstWord].length)];
const newSentence = sentence.replace(/^\w+/, replacement);
modifications++;
logSh(` 🔄 Début varié: "${firstWord}" → "${replacement}"`, 'DEBUG');
return newSentence;
}
return sentence;
});
return {
content: processedSentences.join('. '),
modifications
};
}
/**
* DÉTECTION UNIFORMITÉ LONGUEURS (Pattern LLM)
*/
function detectUniformLengths(text) {
const sentences = text.split(/[.!?]+/).filter(s => s.trim().length > 0);
if (sentences.length < 3) return { uniform: false, variance: 0 };
const lengths = sentences.map(s => s.trim().length);
const avgLength = lengths.reduce((sum, len) => sum + len, 0) / lengths.length;
// Calculer variance
const variance = lengths.reduce((sum, len) => sum + Math.pow(len - avgLength, 2), 0) / lengths.length;
const standardDev = Math.sqrt(variance);
// Uniformité si écart-type faible par rapport à moyenne
const coefficientVariation = standardDev / avgLength;
const uniform = coefficientVariation < 0.3; // Seuil arbitraire
return {
uniform,
variance: coefficientVariation,
avgLength,
standardDev,
sentenceCount: sentences.length
};
}
/**
* AJOUT VARIATIONS MICRO-SYNTAXIQUES
*/
function addMicroVariations(text, intensity) {
let modified = text;
let modifications = 0;
// Micro-variations subtiles
const microPatterns = [
{ from: /\btrès (.+?)\b/g, to: 'particulièrement $1', probability: 0.3 },
{ from: /\bassez (.+?)\b/g, to: 'plutôt $1', probability: 0.4 },
{ from: /\bbeaucoup de/g, to: 'de nombreux', probability: 0.3 },
{ from: /\bpermets de/g, to: 'permet de', probability: 0.8 }, // Correction fréquente
{ from: /\bien effet\b/g, to: 'effectivement', probability: 0.2 }
];
microPatterns.forEach(pattern => {
if (Math.random() < (intensity * pattern.probability)) {
const before = modified;
modified = modified.replace(pattern.from, pattern.to);
if (modified !== before) {
modifications++;
logSh(` 🔧 Micro-variation: ${pattern.from}${pattern.to}`, 'DEBUG');
}
}
});
return {
content: modified,
modifications
};
}
// ============= EXPORTS =============
module.exports = {
varyStructures,
splitLongSentences,
mergeShorter,
reorganizeStructures,
fixRepetitiveStarts,
analyzeSentenceStructure,
detectUniformLengths,
addMicroVariations,
LLM_SYNTAX_PATTERNS
};