seogeneratorserver/lib/post-processing/SentenceVariation.js
StillHammer dbf1a3de8c Add technical plan for multi-format export system
Added plan.md with complete architecture for format-agnostic content generation:
- Support for Markdown, HTML, Plain Text, JSON formats
- New FormatExporter module with neutral data structure
- Integration strategy with existing ContentAssembly and ArticleStorage
- Bonus features: SEO metadata generation, readability scoring, WordPress Gutenberg format
- Implementation roadmap with 4 phases (6h total estimated)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-18 16:14:29 +08:00

336 lines
10 KiB
JavaScript

// ========================================
// PATTERN BREAKING - TECHNIQUE 1: SENTENCE VARIATION
// Responsabilité: Varier les longueurs de phrases pour casser l'uniformité
// Anti-détection: Éviter patterns syntaxiques réguliers des LLMs
// ========================================
const { logSh } = require('../ErrorReporting');
const { tracer } = require('../trace');
/**
* MAIN ENTRY POINT - VARIATION LONGUEUR PHRASES
* @param {Object} input - { content: {}, config: {}, context: {} }
* @returns {Object} - { content: {}, stats: {}, debug: {} }
*/
async function applySentenceVariation(input) {
return await tracer.run('SentenceVariation.applySentenceVariation()', async () => {
const { content, config = {}, context = {} } = input;
const {
intensity = 0.3, // Probabilité de modification (30%)
splitThreshold = 100, // Chars pour split
mergeThreshold = 30, // Chars pour merge
preserveQuestions = true, // Préserver questions FAQ
preserveTitles = true // Préserver titres
} = config;
await tracer.annotate({
technique: 'sentence_variation',
intensity,
elementsCount: Object.keys(content).length
});
const startTime = Date.now();
logSh(`📐 TECHNIQUE 1/3: Variation longueur phrases (intensité: ${intensity})`, 'INFO');
logSh(` 📊 ${Object.keys(content).length} éléments à analyser`, 'DEBUG');
try {
const results = {};
let totalProcessed = 0;
let totalModified = 0;
let modificationsDetails = [];
// Traiter chaque élément de contenu
for (const [tag, text] of Object.entries(content)) {
totalProcessed++;
// Skip certains éléments selon config
if (shouldSkipElement(tag, text, { preserveQuestions, preserveTitles })) {
results[tag] = text;
logSh(` ⏭️ [${tag}]: Préservé (${getSkipReason(tag, text)})`, 'DEBUG');
continue;
}
// Appliquer variation si éligible
const variationResult = varyTextStructure(text, {
intensity,
splitThreshold,
mergeThreshold,
tag
});
results[tag] = variationResult.text;
if (variationResult.modified) {
totalModified++;
modificationsDetails.push({
tag,
modifications: variationResult.modifications,
originalLength: text.length,
newLength: variationResult.text.length
});
logSh(` ✏️ [${tag}]: ${variationResult.modifications.length} modifications`, 'DEBUG');
} else {
logSh(` ➡️ [${tag}]: Aucune modification`, 'DEBUG');
}
}
const duration = Date.now() - startTime;
const stats = {
processed: totalProcessed,
modified: totalModified,
modificationRate: Math.round((totalModified / totalProcessed) * 100),
duration,
technique: 'sentence_variation'
};
logSh(`✅ VARIATION PHRASES: ${stats.modified}/${stats.processed} éléments modifiés (${stats.modificationRate}%) en ${duration}ms`, 'INFO');
await tracer.event('Sentence variation terminée', stats);
return {
content: results,
stats,
debug: {
technique: 'sentence_variation',
config: { intensity, splitThreshold, mergeThreshold },
modifications: modificationsDetails
}
};
} catch (error) {
const duration = Date.now() - startTime;
logSh(`❌ VARIATION PHRASES échouée après ${duration}ms: ${error.message}`, 'ERROR');
throw new Error(`SentenceVariation failed: ${error.message}`);
}
}, input);
}
/**
* Appliquer variation structure à un texte
*/
function varyTextStructure(text, config) {
const { intensity, splitThreshold, mergeThreshold, tag } = config;
if (text.length < 50) {
return { text, modified: false, modifications: [] };
}
// Séparer en phrases
const sentences = splitIntoSentences(text);
if (sentences.length < 2) {
return { text, modified: false, modifications: [] };
}
let modifiedSentences = [...sentences];
const modifications = [];
// TECHNIQUE 1: SPLIT des phrases longues
for (let i = 0; i < modifiedSentences.length; i++) {
const sentence = modifiedSentences[i];
if (sentence.length > splitThreshold && Math.random() < intensity) {
const splitResult = splitLongSentence(sentence);
if (splitResult.success) {
modifiedSentences.splice(i, 1, splitResult.part1, splitResult.part2);
modifications.push({
type: 'split',
original: sentence.substring(0, 50) + '...',
result: `${splitResult.part1.substring(0, 25)}... | ${splitResult.part2.substring(0, 25)}...`
});
i++; // Skip la phrase suivante (qui est notre part2)
}
}
}
// TECHNIQUE 2: MERGE des phrases courtes
for (let i = 0; i < modifiedSentences.length - 1; i++) {
const current = modifiedSentences[i];
const next = modifiedSentences[i + 1];
if (current.length < mergeThreshold && next.length < mergeThreshold && Math.random() < intensity) {
const merged = mergeSentences(current, next);
if (merged.success) {
modifiedSentences.splice(i, 2, merged.result);
modifications.push({
type: 'merge',
original: `${current.substring(0, 20)}... + ${next.substring(0, 20)}...`,
result: merged.result.substring(0, 50) + '...'
});
}
}
}
const finalText = modifiedSentences.join(' ').trim();
return {
text: finalText,
modified: modifications.length > 0,
modifications
};
}
/**
* Diviser texte en phrases
*/
function splitIntoSentences(text) {
// Regex plus sophistiquée pour gérer les abréviations
const sentences = text.split(/(?<![A-Z][a-z]\.)\s*[.!?]+\s+/)
.map(s => s.trim())
.filter(s => s.length > 5);
return sentences;
}
/**
* Diviser une phrase longue en deux
*/
function splitLongSentence(sentence) {
// Points de rupture naturels
const breakPoints = [
', et ',
', mais ',
', car ',
', donc ',
', ainsi ',
', alors ',
', tandis que ',
', bien que '
];
// Chercher le meilleur point de rupture proche du milieu
const idealBreak = sentence.length / 2;
let bestBreak = null;
let bestDistance = Infinity;
for (const breakPoint of breakPoints) {
const index = sentence.indexOf(breakPoint, idealBreak - 50);
if (index > 0 && index < sentence.length - 20) {
const distance = Math.abs(index - idealBreak);
if (distance < bestDistance) {
bestDistance = distance;
bestBreak = { index, breakPoint };
}
}
}
if (bestBreak) {
const part1 = sentence.substring(0, bestBreak.index + 1).trim();
const part2 = sentence.substring(bestBreak.index + bestBreak.breakPoint.length).trim();
// Assurer que part2 commence par une majuscule
const capitalizedPart2 = part2.charAt(0).toUpperCase() + part2.slice(1);
return {
success: true,
part1,
part2: capitalizedPart2
};
}
return { success: false };
}
/**
* Fusionner deux phrases courtes
*/
function mergeSentences(sentence1, sentence2) {
// Connecteurs pour fusion naturelle
const connectors = [
'et',
'puis',
'aussi',
'également',
'de plus'
];
// Choisir connecteur aléatoire
const connector = connectors[Math.floor(Math.random() * connectors.length)];
// Nettoyer les phrases
let cleaned1 = sentence1.replace(/[.!?]+$/, '').trim();
let cleaned2 = sentence2.trim();
// Mettre sentence2 en minuscule sauf si nom propre
if (!/^[A-Z][a-z]*\s+[A-Z]/.test(cleaned2)) {
cleaned2 = cleaned2.charAt(0).toLowerCase() + cleaned2.slice(1);
}
const merged = `${cleaned1}, ${connector} ${cleaned2}`;
return {
success: merged.length < 200, // Éviter phrases trop longues
result: merged
};
}
/**
* Déterminer si un élément doit être skippé
*/
function shouldSkipElement(tag, text, config) {
// Skip titres si demandé
if (config.preserveTitles && (tag.includes('Titre') || tag.includes('H1') || tag.includes('H2'))) {
return true;
}
// Skip questions FAQ si demandé
if (config.preserveQuestions && (tag.includes('Faq_q') || text.includes('?'))) {
return true;
}
// Skip textes très courts
if (text.length < 50) {
return true;
}
return false;
}
/**
* Obtenir raison du skip pour debug
*/
function getSkipReason(tag, text) {
if (tag.includes('Titre') || tag.includes('H1') || tag.includes('H2')) return 'titre';
if (tag.includes('Faq_q') || text.includes('?')) return 'question';
if (text.length < 50) return 'trop court';
return 'autre';
}
/**
* Analyser les patterns de phrases d'un texte
*/
function analyzeSentencePatterns(text) {
const sentences = splitIntoSentences(text);
if (sentences.length < 2) {
return { needsVariation: false, patterns: [] };
}
const lengths = sentences.map(s => s.length);
const avgLength = lengths.reduce((a, b) => a + b, 0) / lengths.length;
// Calculer uniformité (variance faible = uniformité élevée)
const variance = lengths.reduce((acc, len) => acc + Math.pow(len - avgLength, 2), 0) / lengths.length;
const uniformity = 1 / (1 + Math.sqrt(variance) / avgLength); // 0-1, 1 = très uniforme
return {
needsVariation: uniformity > 0.7, // Seuil d'uniformité problématique
patterns: {
avgLength: Math.round(avgLength),
uniformity: Math.round(uniformity * 100),
sentenceCount: sentences.length,
variance: Math.round(variance)
}
};
}
module.exports = {
applySentenceVariation, // ← MAIN ENTRY POINT
varyTextStructure,
splitIntoSentences,
splitLongSentence,
mergeSentences,
analyzeSentencePatterns
};