Added plan.md with complete architecture for format-agnostic content generation: - Support for Markdown, HTML, Plain Text, JSON formats - New FormatExporter module with neutral data structure - Integration strategy with existing ContentAssembly and ArticleStorage - Bonus features: SEO metadata generation, readability scoring, WordPress Gutenberg format - Implementation roadmap with 4 phases (6h total estimated) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
387 lines
14 KiB
JavaScript
387 lines
14 KiB
JavaScript
// ========================================
|
|
// FICHIER: LLMFingerprints.js
|
|
// RESPONSABILITÉ: Remplacement mots et expressions typiques LLM
|
|
// Identification et remplacement des "fingerprints" IA
|
|
// ========================================
|
|
|
|
const { logSh } = require('../ErrorReporting');
|
|
|
|
/**
|
|
* MOTS ET EXPRESSIONS TYPIQUES LLM À REMPLACER
|
|
* Classés par niveau de suspicion et fréquence d'usage LLM
|
|
*/
|
|
const LLM_FINGERPRINTS = {
|
|
|
|
// ========================================
|
|
// NIVEAU CRITIQUE - Très suspects
|
|
// ========================================
|
|
critical: {
|
|
adjectives: [
|
|
{ word: 'comprehensive', alternatives: ['complet', 'détaillé', 'approfondi', 'exhaustif'], suspicion: 0.95 },
|
|
{ word: 'robust', alternatives: ['solide', 'fiable', 'résistant', 'durable'], suspicion: 0.92 },
|
|
{ word: 'seamless', alternatives: ['fluide', 'harmonieux', 'sans accroc', 'naturel'], suspicion: 0.90 },
|
|
{ word: 'optimal', alternatives: ['idéal', 'parfait', 'excellent', 'adapté'], suspicion: 0.88 },
|
|
{ word: 'cutting-edge', alternatives: ['innovant', 'moderne', 'récent', 'avancé'], suspicion: 0.87 },
|
|
{ word: 'state-of-the-art', alternatives: ['dernier cri', 'moderne', 'récent'], suspicion: 0.95 }
|
|
],
|
|
|
|
expressions: [
|
|
{ phrase: 'il est important de noter que', alternatives: ['remarquons que', 'signalons que', 'précisons que'], suspicion: 0.85 },
|
|
{ phrase: 'dans le paysage actuel', alternatives: ['actuellement', 'de nos jours', 'aujourd\'hui'], suspicion: 0.82 },
|
|
{ phrase: 'il convient de souligner', alternatives: ['il faut noter', 'soulignons', 'remarquons'], suspicion: 0.80 },
|
|
{ phrase: 'en fin de compte', alternatives: ['finalement', 'au final', 'pour conclure'], suspicion: 0.75 }
|
|
]
|
|
},
|
|
|
|
// ========================================
|
|
// NIVEAU ÉLEVÉ - Souvent suspects
|
|
// ========================================
|
|
high: {
|
|
adjectives: [
|
|
{ word: 'innovative', alternatives: ['novateur', 'créatif', 'original', 'moderne'], suspicion: 0.75 },
|
|
{ word: 'efficient', alternatives: ['efficace', 'performant', 'rapide', 'pratique'], suspicion: 0.70 },
|
|
{ word: 'versatile', alternatives: ['polyvalent', 'adaptable', 'flexible', 'modulable'], suspicion: 0.68 },
|
|
{ word: 'sophisticated', alternatives: ['raffiné', 'élaboré', 'avancé', 'complexe'], suspicion: 0.65 },
|
|
{ word: 'compelling', alternatives: ['convaincant', 'captivant', 'intéressant'], suspicion: 0.72 }
|
|
],
|
|
|
|
verbs: [
|
|
{ word: 'leverage', alternatives: ['utiliser', 'exploiter', 'tirer parti de', 'employer'], suspicion: 0.80 },
|
|
{ word: 'optimize', alternatives: ['améliorer', 'perfectionner', 'ajuster'], suspicion: 0.65 },
|
|
{ word: 'streamline', alternatives: ['simplifier', 'rationaliser', 'organiser'], suspicion: 0.75 },
|
|
{ word: 'enhance', alternatives: ['améliorer', 'enrichir', 'renforcer'], suspicion: 0.60 }
|
|
],
|
|
|
|
expressions: [
|
|
{ phrase: 'par ailleurs', alternatives: ['de plus', 'également', 'aussi', 'en outre'], suspicion: 0.65 },
|
|
{ phrase: 'en outre', alternatives: ['de plus', 'également', 'aussi'], suspicion: 0.70 },
|
|
{ phrase: 'cela dit', alternatives: ['néanmoins', 'toutefois', 'cependant'], suspicion: 0.60 }
|
|
]
|
|
},
|
|
|
|
// ========================================
|
|
// NIVEAU MODÉRÉ - Parfois suspects
|
|
// ========================================
|
|
moderate: {
|
|
adjectives: [
|
|
{ word: 'significant', alternatives: ['important', 'notable', 'considérable', 'marquant'], suspicion: 0.55 },
|
|
{ word: 'essential', alternatives: ['indispensable', 'crucial', 'vital', 'nécessaire'], suspicion: 0.50 },
|
|
{ word: 'comprehensive', alternatives: ['complet', 'global', 'détaillé'], suspicion: 0.58 },
|
|
{ word: 'effective', alternatives: ['efficace', 'performant', 'réussi'], suspicion: 0.45 }
|
|
],
|
|
|
|
expressions: [
|
|
{ phrase: 'il est essentiel de', alternatives: ['il faut', 'il importe de', 'il est crucial de'], suspicion: 0.55 },
|
|
{ phrase: 'dans cette optique', alternatives: ['dans cette perspective', 'ainsi', 'de ce fait'], suspicion: 0.52 },
|
|
{ phrase: 'à cet égard', alternatives: ['sur ce point', 'concernant cela', 'à ce propos'], suspicion: 0.48 }
|
|
]
|
|
}
|
|
};
|
|
|
|
/**
|
|
* PATTERNS STRUCTURELS LLM
|
|
*/
|
|
const STRUCTURAL_PATTERNS = {
|
|
// Débuts de phrases trop formels
|
|
formalStarts: [
|
|
/^Il est important de souligner que/gi,
|
|
/^Il convient de noter que/gi,
|
|
/^Il est essentiel de comprendre que/gi,
|
|
/^Dans ce contexte, il est crucial de/gi,
|
|
/^Il est primordial de/gi
|
|
],
|
|
|
|
// Transitions trop parfaites
|
|
perfectTransitions: [
|
|
/\. Par ailleurs, (.+?)\. En outre, (.+?)\. De plus,/gi,
|
|
/\. Premièrement, (.+?)\. Deuxièmement, (.+?)\. Troisièmement,/gi
|
|
],
|
|
|
|
// Conclusions trop formelles
|
|
formalConclusions: [
|
|
/En conclusion, il apparaît clairement que/gi,
|
|
/Pour conclure, il est évident que/gi,
|
|
/En définitive, nous pouvons affirmer que/gi
|
|
]
|
|
};
|
|
|
|
/**
|
|
* DÉTECTION PATTERNS LLM DANS LE TEXTE
|
|
* @param {string} text - Texte à analyser
|
|
* @returns {object} - { count, patterns, suspicionScore }
|
|
*/
|
|
function detectLLMPatterns(text) {
|
|
if (!text || text.trim().length === 0) {
|
|
return { count: 0, patterns: [], suspicionScore: 0 };
|
|
}
|
|
|
|
const detectedPatterns = [];
|
|
let totalSuspicion = 0;
|
|
let wordCount = text.split(/\s+/).length;
|
|
|
|
// Analyser tous les niveaux de fingerprints
|
|
Object.entries(LLM_FINGERPRINTS).forEach(([level, categories]) => {
|
|
Object.entries(categories).forEach(([category, items]) => {
|
|
items.forEach(item => {
|
|
const regex = new RegExp(`\\b${item.word || item.phrase}\\b`, 'gi');
|
|
const matches = text.match(regex);
|
|
|
|
if (matches) {
|
|
detectedPatterns.push({
|
|
pattern: item.word || item.phrase,
|
|
type: category,
|
|
level: level,
|
|
count: matches.length,
|
|
suspicion: item.suspicion,
|
|
alternatives: item.alternatives
|
|
});
|
|
|
|
totalSuspicion += item.suspicion * matches.length;
|
|
}
|
|
});
|
|
});
|
|
});
|
|
|
|
// Analyser patterns structurels
|
|
Object.entries(STRUCTURAL_PATTERNS).forEach(([patternType, patterns]) => {
|
|
patterns.forEach(pattern => {
|
|
const matches = text.match(pattern);
|
|
if (matches) {
|
|
detectedPatterns.push({
|
|
pattern: pattern.source,
|
|
type: 'structural',
|
|
level: 'high',
|
|
count: matches.length,
|
|
suspicion: 0.80
|
|
});
|
|
|
|
totalSuspicion += 0.80 * matches.length;
|
|
}
|
|
});
|
|
});
|
|
|
|
const suspicionScore = wordCount > 0 ? totalSuspicion / wordCount : 0;
|
|
|
|
logSh(`🔍 Patterns LLM détectés: ${detectedPatterns.length} (score suspicion: ${suspicionScore.toFixed(3)})`, 'DEBUG');
|
|
|
|
return {
|
|
count: detectedPatterns.length,
|
|
patterns: detectedPatterns.map(p => p.pattern),
|
|
detailedPatterns: detectedPatterns,
|
|
suspicionScore,
|
|
recommendation: suspicionScore > 0.05 ? 'replacement' : 'minor_cleanup'
|
|
};
|
|
}
|
|
|
|
/**
|
|
* REMPLACEMENT FINGERPRINTS LLM
|
|
* @param {string} text - Texte à traiter
|
|
* @param {object} options - Options { intensity, preserveContext, maxReplacements }
|
|
* @returns {object} - { content, replacements, details }
|
|
*/
|
|
function replaceLLMFingerprints(text, options = {}) {
|
|
if (!text || text.trim().length === 0) {
|
|
return { content: text, replacements: 0 };
|
|
}
|
|
|
|
const config = {
|
|
intensity: 0.5,
|
|
preserveContext: true,
|
|
maxReplacements: 5,
|
|
...options
|
|
};
|
|
|
|
logSh(`🤖 Remplacement fingerprints LLM: intensité ${config.intensity}`, 'DEBUG');
|
|
|
|
let modifiedText = text;
|
|
let totalReplacements = 0;
|
|
const replacementDetails = [];
|
|
|
|
try {
|
|
// Détecter d'abord les patterns
|
|
const detection = detectLLMPatterns(modifiedText);
|
|
|
|
if (detection.count === 0) {
|
|
logSh(` ✅ Aucun fingerprint LLM détecté`, 'DEBUG');
|
|
return { content: text, replacements: 0, details: [] };
|
|
}
|
|
|
|
// Traiter par niveau de priorité
|
|
const priorities = ['critical', 'high', 'moderate'];
|
|
|
|
for (const priority of priorities) {
|
|
if (totalReplacements >= config.maxReplacements) break;
|
|
|
|
const categoryData = LLM_FINGERPRINTS[priority];
|
|
if (!categoryData) continue;
|
|
|
|
// Traiter chaque catégorie
|
|
Object.entries(categoryData).forEach(([category, items]) => {
|
|
items.forEach(item => {
|
|
if (totalReplacements >= config.maxReplacements) return;
|
|
|
|
const searchTerm = item.word || item.phrase;
|
|
const regex = new RegExp(`\\b${searchTerm}\\b`, 'gi');
|
|
|
|
// Probabilité de remplacement basée sur suspicion et intensité
|
|
const replacementProbability = item.suspicion * config.intensity;
|
|
|
|
if (modifiedText.match(regex) && Math.random() < replacementProbability) {
|
|
// Choisir alternative aléatoire
|
|
const alternative = item.alternatives[Math.floor(Math.random() * item.alternatives.length)];
|
|
|
|
const beforeText = modifiedText;
|
|
modifiedText = modifiedText.replace(regex, alternative);
|
|
|
|
if (modifiedText !== beforeText) {
|
|
totalReplacements++;
|
|
replacementDetails.push({
|
|
original: searchTerm,
|
|
replacement: alternative,
|
|
category,
|
|
level: priority,
|
|
suspicion: item.suspicion
|
|
});
|
|
|
|
logSh(` 🔄 Remplacé "${searchTerm}" → "${alternative}" (suspicion: ${item.suspicion})`, 'DEBUG');
|
|
}
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
// Traitement patterns structurels
|
|
if (totalReplacements < config.maxReplacements) {
|
|
const structuralResult = replaceStructuralPatterns(modifiedText, config.intensity);
|
|
modifiedText = structuralResult.content;
|
|
totalReplacements += structuralResult.replacements;
|
|
replacementDetails.push(...structuralResult.details);
|
|
}
|
|
|
|
logSh(`🤖 Fingerprints remplacés: ${totalReplacements} modifications`, 'DEBUG');
|
|
|
|
} catch (error) {
|
|
logSh(`❌ Erreur remplacement fingerprints: ${error.message}`, 'WARNING');
|
|
return { content: text, replacements: 0, details: [] };
|
|
}
|
|
|
|
return {
|
|
content: modifiedText,
|
|
replacements: totalReplacements,
|
|
details: replacementDetails
|
|
};
|
|
}
|
|
|
|
/**
|
|
* REMPLACEMENT PATTERNS STRUCTURELS
|
|
*/
|
|
function replaceStructuralPatterns(text, intensity) {
|
|
let modified = text;
|
|
let replacements = 0;
|
|
const details = [];
|
|
|
|
// Débuts formels → versions plus naturelles
|
|
const formalStartReplacements = [
|
|
{
|
|
from: /^Il est important de souligner que (.+)/gim,
|
|
to: 'Notons que $1',
|
|
name: 'début formel'
|
|
},
|
|
{
|
|
from: /^Il convient de noter que (.+)/gim,
|
|
to: 'Précisons que $1',
|
|
name: 'formulation convient'
|
|
},
|
|
{
|
|
from: /^Dans ce contexte, il est crucial de (.+)/gim,
|
|
to: 'Il faut $1',
|
|
name: 'contexte crucial'
|
|
}
|
|
];
|
|
|
|
formalStartReplacements.forEach(replacement => {
|
|
if (Math.random() < intensity * 0.7) {
|
|
const before = modified;
|
|
modified = modified.replace(replacement.from, replacement.to);
|
|
|
|
if (modified !== before) {
|
|
replacements++;
|
|
details.push({
|
|
original: replacement.name,
|
|
replacement: 'version naturelle',
|
|
category: 'structural',
|
|
level: 'high',
|
|
suspicion: 0.80
|
|
});
|
|
|
|
logSh(` 🏗️ Pattern structurel remplacé: ${replacement.name}`, 'DEBUG');
|
|
}
|
|
}
|
|
});
|
|
|
|
return {
|
|
content: modified,
|
|
replacements,
|
|
details
|
|
};
|
|
}
|
|
|
|
/**
|
|
* ANALYSE DENSITÉ FINGERPRINTS
|
|
*/
|
|
function analyzeFingerprintDensity(text) {
|
|
const detection = detectLLMPatterns(text);
|
|
const wordCount = text.split(/\s+/).length;
|
|
|
|
const density = detection.count / wordCount;
|
|
const riskLevel = density > 0.08 ? 'high' : density > 0.04 ? 'medium' : 'low';
|
|
|
|
return {
|
|
fingerprintCount: detection.count,
|
|
wordCount,
|
|
density,
|
|
riskLevel,
|
|
suspicionScore: detection.suspicionScore,
|
|
recommendation: riskLevel === 'high' ? 'immediate_replacement' :
|
|
riskLevel === 'medium' ? 'selective_replacement' : 'minimal_cleanup'
|
|
};
|
|
}
|
|
|
|
/**
|
|
* SUGGESTIONS CONTEXTUELLES
|
|
*/
|
|
function generateContextualAlternatives(word, context, personality) {
|
|
// Adapter selon personnalité si fournie
|
|
if (personality) {
|
|
const personalityAdaptations = {
|
|
'marc': { 'optimal': 'efficace', 'robust': 'solide', 'comprehensive': 'complet' },
|
|
'sophie': { 'optimal': 'parfait', 'robust': 'résistant', 'comprehensive': 'détaillé' },
|
|
'kevin': { 'optimal': 'nickel', 'robust': 'costaud', 'comprehensive': 'complet' }
|
|
};
|
|
|
|
const adaptations = personalityAdaptations[personality.toLowerCase()];
|
|
if (adaptations && adaptations[word]) {
|
|
return [adaptations[word]];
|
|
}
|
|
}
|
|
|
|
// Suggestions contextuelles basiques
|
|
const contextualMappings = {
|
|
'optimal': context.includes('solution') ? ['idéale', 'parfaite'] : ['excellent', 'adapté'],
|
|
'robust': context.includes('système') ? ['fiable', 'stable'] : ['solide', 'résistant'],
|
|
'comprehensive': context.includes('analyse') ? ['approfondie', 'détaillée'] : ['complète', 'globale']
|
|
};
|
|
|
|
return contextualMappings[word] || ['standard'];
|
|
}
|
|
|
|
// ============= EXPORTS =============
|
|
module.exports = {
|
|
detectLLMPatterns,
|
|
replaceLLMFingerprints,
|
|
replaceStructuralPatterns,
|
|
analyzeFingerprintDensity,
|
|
generateContextualAlternatives,
|
|
LLM_FINGERPRINTS,
|
|
STRUCTURAL_PATTERNS
|
|
}; |