Added plan.md with complete architecture for format-agnostic content generation: - Support for Markdown, HTML, Plain Text, JSON formats - New FormatExporter module with neutral data structure - Integration strategy with existing ContentAssembly and ArticleStorage - Bonus features: SEO metadata generation, readability scoring, WordPress Gutenberg format - Implementation roadmap with 4 phases (6h total estimated) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
380 lines
12 KiB
JavaScript
380 lines
12 KiB
JavaScript
// ========================================
|
|
// FICHIER: SyntaxVariations.js
|
|
// RESPONSABILITÉ: Variations syntaxiques pour casser patterns LLM
|
|
// Techniques: découpage, fusion, restructuration phrases
|
|
// ========================================
|
|
|
|
const { logSh } = require('../ErrorReporting');
|
|
|
|
/**
|
|
* PATTERNS SYNTAXIQUES TYPIQUES LLM À ÉVITER
|
|
*/
|
|
const LLM_SYNTAX_PATTERNS = {
|
|
// Structures trop prévisibles
|
|
repetitiveStarts: [
|
|
/^Il est important de/gi,
|
|
/^Il convient de/gi,
|
|
/^Il faut noter que/gi,
|
|
/^Dans ce contexte/gi,
|
|
/^Par ailleurs/gi
|
|
],
|
|
|
|
// Phrases trop parfaites
|
|
perfectStructures: [
|
|
/^De plus, .+ En outre, .+ Enfin,/gi,
|
|
/^Premièrement, .+ Deuxièmement, .+ Troisièmement,/gi
|
|
],
|
|
|
|
// Longueurs trop régulières (détection pattern)
|
|
uniformLengths: true // Détecté dynamiquement
|
|
};
|
|
|
|
/**
|
|
* VARIATION STRUCTURES SYNTAXIQUES - FONCTION PRINCIPALE
|
|
* @param {string} text - Texte à varier
|
|
* @param {number} intensity - Intensité variation (0-1)
|
|
* @param {object} options - Options { preserveReadability, maxModifications }
|
|
* @returns {object} - { content, modifications, stats }
|
|
*/
|
|
function varyStructures(text, intensity = 0.3, options = {}) {
|
|
if (!text || text.trim().length === 0) {
|
|
return { content: text, modifications: 0 };
|
|
}
|
|
|
|
const config = {
|
|
preserveReadability: true,
|
|
maxModifications: 3,
|
|
...options
|
|
};
|
|
|
|
logSh(`📝 Variation syntaxique: intensité ${intensity}, préservation: ${config.preserveReadability}`, 'DEBUG');
|
|
|
|
let modifiedText = text;
|
|
let totalModifications = 0;
|
|
const stats = {
|
|
sentencesSplit: 0,
|
|
sentencesMerged: 0,
|
|
structuresReorganized: 0,
|
|
repetitiveStartsFixed: 0
|
|
};
|
|
|
|
try {
|
|
// 1. Analyser structure phrases
|
|
const sentences = analyzeSentenceStructure(modifiedText);
|
|
logSh(` 📊 ${sentences.length} phrases analysées`, 'DEBUG');
|
|
|
|
// 2. Découper phrases longues
|
|
if (Math.random() < intensity) {
|
|
const splitResult = splitLongSentences(modifiedText, intensity);
|
|
modifiedText = splitResult.content;
|
|
totalModifications += splitResult.modifications;
|
|
stats.sentencesSplit = splitResult.modifications;
|
|
}
|
|
|
|
// 3. Fusionner phrases courtes
|
|
if (Math.random() < intensity * 0.7) {
|
|
const mergeResult = mergeShorter(modifiedText, intensity);
|
|
modifiedText = mergeResult.content;
|
|
totalModifications += mergeResult.modifications;
|
|
stats.sentencesMerged = mergeResult.modifications;
|
|
}
|
|
|
|
// 4. Réorganiser structures prévisibles
|
|
if (Math.random() < intensity * 0.8) {
|
|
const reorganizeResult = reorganizeStructures(modifiedText, intensity);
|
|
modifiedText = reorganizeResult.content;
|
|
totalModifications += reorganizeResult.modifications;
|
|
stats.structuresReorganized = reorganizeResult.modifications;
|
|
}
|
|
|
|
// 5. Corriger débuts répétitifs
|
|
if (Math.random() < intensity * 0.6) {
|
|
const repetitiveResult = fixRepetitiveStarts(modifiedText);
|
|
modifiedText = repetitiveResult.content;
|
|
totalModifications += repetitiveResult.modifications;
|
|
stats.repetitiveStartsFixed = repetitiveResult.modifications;
|
|
}
|
|
|
|
// 6. Limitation sécurité
|
|
if (totalModifications > config.maxModifications) {
|
|
logSh(` ⚠️ Limitation appliquée: ${totalModifications} → ${config.maxModifications} modifications`, 'DEBUG');
|
|
totalModifications = config.maxModifications;
|
|
}
|
|
|
|
logSh(`📝 Syntaxe modifiée: ${totalModifications} changements (${stats.sentencesSplit} splits, ${stats.sentencesMerged} merges)`, 'DEBUG');
|
|
|
|
} catch (error) {
|
|
logSh(`❌ Erreur variation syntaxique: ${error.message}`, 'WARNING');
|
|
return { content: text, modifications: 0, stats: {} };
|
|
}
|
|
|
|
return {
|
|
content: modifiedText,
|
|
modifications: totalModifications,
|
|
stats
|
|
};
|
|
}
|
|
|
|
/**
|
|
* ANALYSE STRUCTURE PHRASES
|
|
*/
|
|
function analyzeSentenceStructure(text) {
|
|
const sentences = text.split(/[.!?]+/).filter(s => s.trim().length > 0);
|
|
|
|
return sentences.map((sentence, index) => ({
|
|
index,
|
|
content: sentence.trim(),
|
|
length: sentence.trim().length,
|
|
wordCount: sentence.trim().split(/\s+/).length,
|
|
isLong: sentence.trim().length > 120,
|
|
isShort: sentence.trim().length < 40,
|
|
hasComplexStructure: sentence.includes(',') && sentence.includes(' qui ') || sentence.includes(' que ')
|
|
}));
|
|
}
|
|
|
|
/**
|
|
* DÉCOUPAGE PHRASES LONGUES
|
|
*/
|
|
function splitLongSentences(text, intensity) {
|
|
let modified = text;
|
|
let modifications = 0;
|
|
|
|
const sentences = modified.split('. ');
|
|
const processedSentences = sentences.map(sentence => {
|
|
|
|
// Phrases longues (>120 chars) et probabilité selon intensité
|
|
if (sentence.length > 120 && Math.random() < (intensity * 0.4)) {
|
|
|
|
// Points de découpe naturels
|
|
const cutPoints = [
|
|
{ pattern: /, qui (.+)/, replacement: '. Celui-ci $1' },
|
|
{ pattern: /, que (.+)/, replacement: '. Cela $1' },
|
|
{ pattern: /, dont (.+)/, replacement: '. Celui-ci $1' },
|
|
{ pattern: / et (.{30,})/, replacement: '. De plus, $1' },
|
|
{ pattern: /, car (.+)/, replacement: '. En effet, $1' },
|
|
{ pattern: /, mais (.+)/, replacement: '. Cependant, $1' }
|
|
];
|
|
|
|
for (const cutPoint of cutPoints) {
|
|
if (sentence.match(cutPoint.pattern)) {
|
|
const newSentence = sentence.replace(cutPoint.pattern, cutPoint.replacement);
|
|
if (newSentence !== sentence) {
|
|
modifications++;
|
|
logSh(` ✂️ Phrase découpée: ${sentence.length} → ${newSentence.length} chars`, 'DEBUG');
|
|
return newSentence;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return sentence;
|
|
});
|
|
|
|
return {
|
|
content: processedSentences.join('. '),
|
|
modifications
|
|
};
|
|
}
|
|
|
|
/**
|
|
* FUSION PHRASES COURTES
|
|
*/
|
|
function mergeShorter(text, intensity) {
|
|
let modified = text;
|
|
let modifications = 0;
|
|
|
|
const sentences = modified.split('. ');
|
|
const processedSentences = [];
|
|
|
|
for (let i = 0; i < sentences.length; i++) {
|
|
const current = sentences[i];
|
|
const next = sentences[i + 1];
|
|
|
|
// Si phrase courte (<40 chars) et phrase suivante existe
|
|
if (current && current.length < 40 && next && next.length < 60 && Math.random() < (intensity * 0.3)) {
|
|
|
|
// Connecteurs pour fusion naturelle
|
|
const connectors = [', de plus,', ', également,', ', aussi,', ' et'];
|
|
const connector = connectors[Math.floor(Math.random() * connectors.length)];
|
|
|
|
const merged = current + connector + ' ' + next.toLowerCase();
|
|
processedSentences.push(merged);
|
|
modifications++;
|
|
|
|
logSh(` 🔗 Phrases fusionnées: ${current.length} + ${next.length} → ${merged.length} chars`, 'DEBUG');
|
|
|
|
i++; // Passer la phrase suivante car fusionnée
|
|
} else {
|
|
processedSentences.push(current);
|
|
}
|
|
}
|
|
|
|
return {
|
|
content: processedSentences.join('. '),
|
|
modifications
|
|
};
|
|
}
|
|
|
|
/**
|
|
* RÉORGANISATION STRUCTURES PRÉVISIBLES
|
|
*/
|
|
function reorganizeStructures(text, intensity) {
|
|
let modified = text;
|
|
let modifications = 0;
|
|
|
|
// Détecter énumérations prévisibles
|
|
const enumerationPatterns = [
|
|
{
|
|
pattern: /Premièrement, (.+?)\. Deuxièmement, (.+?)\. Troisièmement, (.+?)\./gi,
|
|
replacement: 'D\'abord, $1. Ensuite, $2. Enfin, $3.'
|
|
},
|
|
{
|
|
pattern: /D\'une part, (.+?)\. D\'autre part, (.+?)\./gi,
|
|
replacement: 'Tout d\'abord, $1. Par ailleurs, $2.'
|
|
},
|
|
{
|
|
pattern: /En premier lieu, (.+?)\. En second lieu, (.+?)\./gi,
|
|
replacement: 'Dans un premier temps, $1. Puis, $2.'
|
|
}
|
|
];
|
|
|
|
enumerationPatterns.forEach(pattern => {
|
|
if (modified.match(pattern.pattern) && Math.random() < intensity) {
|
|
modified = modified.replace(pattern.pattern, pattern.replacement);
|
|
modifications++;
|
|
logSh(` 🔄 Structure réorganisée: énumération variée`, 'DEBUG');
|
|
}
|
|
});
|
|
|
|
return {
|
|
content: modified,
|
|
modifications
|
|
};
|
|
}
|
|
|
|
/**
|
|
* CORRECTION DÉBUTS RÉPÉTITIFS
|
|
*/
|
|
function fixRepetitiveStarts(text) {
|
|
let modified = text;
|
|
let modifications = 0;
|
|
|
|
const sentences = modified.split('. ');
|
|
const startWords = [];
|
|
|
|
// Analyser débuts de phrases
|
|
sentences.forEach(sentence => {
|
|
const words = sentence.trim().split(/\s+/);
|
|
if (words.length > 0) {
|
|
startWords.push(words[0].toLowerCase());
|
|
}
|
|
});
|
|
|
|
// Détecter répétitions
|
|
const startCounts = {};
|
|
startWords.forEach(word => {
|
|
startCounts[word] = (startCounts[word] || 0) + 1;
|
|
});
|
|
|
|
// Remplacer débuts répétitifs
|
|
const alternatives = {
|
|
'il': ['Cet élément', 'Cette solution', 'Ce produit'],
|
|
'cette': ['Cette option', 'Cette approche', 'Cette méthode'],
|
|
'pour': ['Afin de', 'Dans le but de', 'En vue de'],
|
|
'avec': ['Grâce à', 'Au moyen de', 'En utilisant'],
|
|
'dans': ['Au sein de', 'À travers', 'Parmi']
|
|
};
|
|
|
|
const processedSentences = sentences.map(sentence => {
|
|
const firstWord = sentence.trim().split(/\s+/)[0]?.toLowerCase();
|
|
|
|
if (firstWord && startCounts[firstWord] > 2 && alternatives[firstWord] && Math.random() < 0.4) {
|
|
const replacement = alternatives[firstWord][Math.floor(Math.random() * alternatives[firstWord].length)];
|
|
const newSentence = sentence.replace(/^\w+/, replacement);
|
|
modifications++;
|
|
logSh(` 🔄 Début varié: "${firstWord}" → "${replacement}"`, 'DEBUG');
|
|
return newSentence;
|
|
}
|
|
|
|
return sentence;
|
|
});
|
|
|
|
return {
|
|
content: processedSentences.join('. '),
|
|
modifications
|
|
};
|
|
}
|
|
|
|
/**
|
|
* DÉTECTION UNIFORMITÉ LONGUEURS (Pattern LLM)
|
|
*/
|
|
function detectUniformLengths(text) {
|
|
const sentences = text.split(/[.!?]+/).filter(s => s.trim().length > 0);
|
|
|
|
if (sentences.length < 3) return { uniform: false, variance: 0 };
|
|
|
|
const lengths = sentences.map(s => s.trim().length);
|
|
const avgLength = lengths.reduce((sum, len) => sum + len, 0) / lengths.length;
|
|
|
|
// Calculer variance
|
|
const variance = lengths.reduce((sum, len) => sum + Math.pow(len - avgLength, 2), 0) / lengths.length;
|
|
const standardDev = Math.sqrt(variance);
|
|
|
|
// Uniformité si écart-type faible par rapport à moyenne
|
|
const coefficientVariation = standardDev / avgLength;
|
|
const uniform = coefficientVariation < 0.3; // Seuil arbitraire
|
|
|
|
return {
|
|
uniform,
|
|
variance: coefficientVariation,
|
|
avgLength,
|
|
standardDev,
|
|
sentenceCount: sentences.length
|
|
};
|
|
}
|
|
|
|
/**
|
|
* AJOUT VARIATIONS MICRO-SYNTAXIQUES
|
|
*/
|
|
function addMicroVariations(text, intensity) {
|
|
let modified = text;
|
|
let modifications = 0;
|
|
|
|
// Micro-variations subtiles
|
|
const microPatterns = [
|
|
{ from: /\btrès (.+?)\b/g, to: 'particulièrement $1', probability: 0.3 },
|
|
{ from: /\bassez (.+?)\b/g, to: 'plutôt $1', probability: 0.4 },
|
|
{ from: /\bbeaucoup de/g, to: 'de nombreux', probability: 0.3 },
|
|
{ from: /\bpermets de/g, to: 'permet de', probability: 0.8 }, // Correction fréquente
|
|
{ from: /\bien effet\b/g, to: 'effectivement', probability: 0.2 }
|
|
];
|
|
|
|
microPatterns.forEach(pattern => {
|
|
if (Math.random() < (intensity * pattern.probability)) {
|
|
const before = modified;
|
|
modified = modified.replace(pattern.from, pattern.to);
|
|
if (modified !== before) {
|
|
modifications++;
|
|
logSh(` 🔧 Micro-variation: ${pattern.from} → ${pattern.to}`, 'DEBUG');
|
|
}
|
|
}
|
|
});
|
|
|
|
return {
|
|
content: modified,
|
|
modifications
|
|
};
|
|
}
|
|
|
|
// ============= EXPORTS =============
|
|
module.exports = {
|
|
varyStructures,
|
|
splitLongSentences,
|
|
mergeShorter,
|
|
reorganizeStructures,
|
|
fixRepetitiveStarts,
|
|
analyzeSentenceStructure,
|
|
detectUniformLengths,
|
|
addMicroVariations,
|
|
LLM_SYNTAX_PATTERNS
|
|
}; |