seogeneratorserver/lib/pattern-breaking/LLMFingerprints.js

// ========================================
// FICHIER: LLMFingerprints.js
// RESPONSABILITÉ: Remplacement mots et expressions typiques LLM
// Identification et remplacement des "fingerprints" IA
// ========================================

const { logSh } = require('../ErrorReporting');

/**
 * MOTS ET EXPRESSIONS TYPIQUES LLM À REMPLACER
 * Classés par niveau de suspicion et fréquence d'usage LLM
 */
const LLM_FINGERPRINTS = {

  // ========================================
  // NIVEAU CRITIQUE - Très suspects
  // ========================================
  critical: {
    adjectives: [
      { word: 'comprehensive', alternatives: ['complet', 'détaillé', 'approfondi', 'exhaustif'], suspicion: 0.95 },
      { word: 'robust', alternatives: ['solide', 'fiable', 'résistant', 'durable'], suspicion: 0.92 },
      { word: 'seamless', alternatives: ['fluide', 'harmonieux', 'sans accroc', 'naturel'], suspicion: 0.90 },
      { word: 'optimal', alternatives: ['idéal', 'parfait', 'excellent', 'adapté'], suspicion: 0.88 },
      { word: 'cutting-edge', alternatives: ['innovant', 'moderne', 'récent', 'avancé'], suspicion: 0.87 },
      { word: 'state-of-the-art', alternatives: ['dernier cri', 'moderne', 'récent'], suspicion: 0.95 }
    ],

    expressions: [
      { phrase: 'il est important de noter que', alternatives: ['remarquons que', 'signalons que', 'précisons que'], suspicion: 0.85 },
      { phrase: 'dans le paysage actuel', alternatives: ['actuellement', 'de nos jours', 'aujourd\'hui'], suspicion: 0.82 },
      { phrase: 'il convient de souligner', alternatives: ['il faut noter', 'soulignons', 'remarquons'], suspicion: 0.80 },
      { phrase: 'en fin de compte', alternatives: ['finalement', 'au final', 'pour conclure'], suspicion: 0.75 }
    ]
  },

  // ========================================
  // NIVEAU ÉLEVÉ - Souvent suspects
  // ========================================
  high: {
    adjectives: [
      { word: 'innovative', alternatives: ['novateur', 'créatif', 'original', 'moderne'], suspicion: 0.75 },
      { word: 'efficient', alternatives: ['efficace', 'performant', 'rapide', 'pratique'], suspicion: 0.70 },
      { word: 'versatile', alternatives: ['polyvalent', 'adaptable', 'flexible', 'modulable'], suspicion: 0.68 },
      { word: 'sophisticated', alternatives: ['raffiné', 'élaboré', 'avancé', 'complexe'], suspicion: 0.65 },
      { word: 'compelling', alternatives: ['convaincant', 'captivant', 'intéressant'], suspicion: 0.72 }
    ],

    verbs: [
      { word: 'leverage', alternatives: ['utiliser', 'exploiter', 'tirer parti de', 'employer'], suspicion: 0.80 },
      { word: 'optimize', alternatives: ['améliorer', 'perfectionner', 'ajuster'], suspicion: 0.65 },
      { word: 'streamline', alternatives: ['simplifier', 'rationaliser', 'organiser'], suspicion: 0.75 },
      { word: 'enhance', alternatives: ['améliorer', 'enrichir', 'renforcer'], suspicion: 0.60 }
    ],

    expressions: [
      { phrase: 'par ailleurs', alternatives: ['de plus', 'également', 'aussi', 'en outre'], suspicion: 0.65 },
      { phrase: 'en outre', alternatives: ['de plus', 'également', 'aussi'], suspicion: 0.70 },
      { phrase: 'cela dit', alternatives: ['néanmoins', 'toutefois', 'cependant'], suspicion: 0.60 }
    ]
  },

  // ========================================
  // NIVEAU MODÉRÉ - Parfois suspects
  // ========================================
  moderate: {
    adjectives: [
      { word: 'significant', alternatives: ['important', 'notable', 'considérable', 'marquant'], suspicion: 0.55 },
      { word: 'essential', alternatives: ['indispensable', 'crucial', 'vital', 'nécessaire'], suspicion: 0.50 },
      { word: 'comprehensive', alternatives: ['complet', 'global', 'détaillé'], suspicion: 0.58 },
      { word: 'effective', alternatives: ['efficace', 'performant', 'réussi'], suspicion: 0.45 }
    ],

    expressions: [
      { phrase: 'il est essentiel de', alternatives: ['il faut', 'il importe de', 'il est crucial de'], suspicion: 0.55 },
      { phrase: 'dans cette optique', alternatives: ['dans cette perspective', 'ainsi', 'de ce fait'], suspicion: 0.52 },
      { phrase: 'à cet égard', alternatives: ['sur ce point', 'concernant cela', 'à ce propos'], suspicion: 0.48 }
    ]
  }
};

/**
 * PATTERNS STRUCTURELS LLM
 */
const STRUCTURAL_PATTERNS = {
  // Débuts de phrases trop formels
  formalStarts: [
    /^Il est important de souligner que/gi,
    /^Il convient de noter que/gi,
    /^Il est essentiel de comprendre que/gi,
    /^Dans ce contexte, il est crucial de/gi,
    /^Il est primordial de/gi
  ],

  // Transitions trop parfaites
  perfectTransitions: [
    /\. Par ailleurs, (.+?)\. En outre, (.+?)\. De plus,/gi,
    /\. Premièrement, (.+?)\. Deuxièmement, (.+?)\. Troisièmement,/gi
  ],

  // Conclusions trop formelles
  formalConclusions: [
    /En conclusion, il apparaît clairement que/gi,
    /Pour conclure, il est évident que/gi,
    /En définitive, nous pouvons affirmer que/gi
  ]
};

/**
 * DÉTECTION PATTERNS LLM DANS LE TEXTE
 * @param {string} text - Texte à analyser
 * @returns {object} - { count, patterns, suspicionScore }
 */
function detectLLMPatterns(text) {
  if (!text || text.trim().length === 0) {
    return { count: 0, patterns: [], suspicionScore: 0 };
  }

  const detectedPatterns = [];
  let totalSuspicion = 0;
  let wordCount = text.split(/\s+/).length;

  // Analyser tous les niveaux de fingerprints
  Object.entries(LLM_FINGERPRINTS).forEach(([level, categories]) => {
    Object.entries(categories).forEach(([category, items]) => {
      items.forEach(item => {
        const regex = new RegExp(`\\b${item.word || item.phrase}\\b`, 'gi');
        const matches = text.match(regex);

        if (matches) {
          detectedPatterns.push({
            pattern: item.word || item.phrase,
            type: category,
            level: level,
            count: matches.length,
            suspicion: item.suspicion,
            alternatives: item.alternatives
          });

          totalSuspicion += item.suspicion * matches.length;
        }
      });
    });
  });

  // Analyser patterns structurels
  Object.entries(STRUCTURAL_PATTERNS).forEach(([patternType, patterns]) => {
    patterns.forEach(pattern => {
      const matches = text.match(pattern);
      if (matches) {
        detectedPatterns.push({
          pattern: pattern.source,
          type: 'structural',
          level: 'high',
          count: matches.length,
          suspicion: 0.80
        });

        totalSuspicion += 0.80 * matches.length;
      }
    });
  });

  const suspicionScore = wordCount > 0 ? totalSuspicion / wordCount : 0;

  logSh(`🔍 Patterns LLM détectés: ${detectedPatterns.length} (score suspicion: ${suspicionScore.toFixed(3)})`, 'DEBUG');

  return {
    count: detectedPatterns.length,
    patterns: detectedPatterns.map(p => p.pattern),
    detailedPatterns: detectedPatterns,
    suspicionScore,
    recommendation: suspicionScore > 0.05 ? 'replacement' : 'minor_cleanup'
  };
}

/**
 * REMPLACEMENT FINGERPRINTS LLM
 * @param {string} text - Texte à traiter
 * @param {object} options - Options { intensity, preserveContext, maxReplacements }
 * @returns {object} - { content, replacements, details }
 */
function replaceLLMFingerprints(text, options = {}) {
  if (!text || text.trim().length === 0) {
    return { content: text, replacements: 0 };
  }

  const config = {
    intensity: 0.5,
    preserveContext: true,
    maxReplacements: 5,
    ...options
  };

  logSh(`🤖 Remplacement fingerprints LLM: intensité ${config.intensity}`, 'DEBUG');

  let modifiedText = text;
  let totalReplacements = 0;
  const replacementDetails = [];

  try {
    // Détecter d'abord les patterns
    const detection = detectLLMPatterns(modifiedText);

    if (detection.count === 0) {
      logSh(`   ✅ Aucun fingerprint LLM détecté`, 'DEBUG');
      return { content: text, replacements: 0, details: [] };
    }

    // Traiter par niveau de priorité
    const priorities = ['critical', 'high', 'moderate'];

    for (const priority of priorities) {
      if (totalReplacements >= config.maxReplacements) break;

      const categoryData = LLM_FINGERPRINTS[priority];
      if (!categoryData) continue;

      // Traiter chaque catégorie
      Object.entries(categoryData).forEach(([category, items]) => {
        items.forEach(item => {
          if (totalReplacements >= config.maxReplacements) return;

          const searchTerm = item.word || item.phrase;
          const regex = new RegExp(`\\b${searchTerm}\\b`, 'gi');

          // Probabilité de remplacement basée sur suspicion et intensité
          const replacementProbability = item.suspicion * config.intensity;

          if (modifiedText.match(regex) && Math.random() < replacementProbability) {
            // Choisir alternative aléatoire
            const alternative = item.alternatives[Math.floor(Math.random() * item.alternatives.length)];

            const beforeText = modifiedText;
            modifiedText = modifiedText.replace(regex, alternative);

            if (modifiedText !== beforeText) {
              totalReplacements++;
              replacementDetails.push({
                original: searchTerm,
                replacement: alternative,
                category,
                level: priority,
                suspicion: item.suspicion
              });

              logSh(`     🔄 Remplacé "${searchTerm}" → "${alternative}" (suspicion: ${item.suspicion})`, 'DEBUG');
            }
          }
        });
      });
    }

    // Traitement patterns structurels
    if (totalReplacements < config.maxReplacements) {
      const structuralResult = replaceStructuralPatterns(modifiedText, config.intensity);
      modifiedText = structuralResult.content;
      totalReplacements += structuralResult.replacements;
      replacementDetails.push(...structuralResult.details);
    }

    logSh(`🤖 Fingerprints remplacés: ${totalReplacements} modifications`, 'DEBUG');

  } catch (error) {
    logSh(`❌ Erreur remplacement fingerprints: ${error.message}`, 'WARNING');
    return { content: text, replacements: 0, details: [] };
  }

  return {
    content: modifiedText,
    replacements: totalReplacements,
    details: replacementDetails
  };
}

/**
 * REMPLACEMENT PATTERNS STRUCTURELS
 */
function replaceStructuralPatterns(text, intensity) {
  let modified = text;
  let replacements = 0;
  const details = [];

  // Débuts formels → versions plus naturelles
  const formalStartReplacements = [
    {
      from: /^Il est important de souligner que (.+)/gim,
      to: 'Notons que $1',
      name: 'début formel'
    },
    {
      from: /^Il convient de noter que (.+)/gim,
      to: 'Précisons que $1',
      name: 'formulation convient'
    },
    {
      from: /^Dans ce contexte, il est crucial de (.+)/gim,
      to: 'Il faut $1',
      name: 'contexte crucial'
    }
  ];

  formalStartReplacements.forEach(replacement => {
    if (Math.random() < intensity * 0.7) {
      const before = modified;
      modified = modified.replace(replacement.from, replacement.to);

      if (modified !== before) {
        replacements++;
        details.push({
          original: replacement.name,
          replacement: 'version naturelle',
          category: 'structural',
          level: 'high',
          suspicion: 0.80
        });

        logSh(`     🏗️ Pattern structurel remplacé: ${replacement.name}`, 'DEBUG');
      }
    }
  });

  return {
    content: modified,
    replacements,
    details
  };
}

/**
 * ANALYSE DENSITÉ FINGERPRINTS
 */
function analyzeFingerprintDensity(text) {
  const detection = detectLLMPatterns(text);
  const wordCount = text.split(/\s+/).length;

  const density = detection.count / wordCount;
  const riskLevel = density > 0.08 ? 'high' : density > 0.04 ? 'medium' : 'low';

  return {
    fingerprintCount: detection.count,
    wordCount,
    density,
    riskLevel,
    suspicionScore: detection.suspicionScore,
    recommendation: riskLevel === 'high' ? 'immediate_replacement' :
                    riskLevel === 'medium' ? 'selective_replacement' : 'minimal_cleanup'
  };
}

/**
 * SUGGESTIONS CONTEXTUELLES
 */
function generateContextualAlternatives(word, context, personality) {
  // Adapter selon personnalité si fournie
  if (personality) {
    const personalityAdaptations = {
      'marc': { 'optimal': 'efficace', 'robust': 'solide', 'comprehensive': 'complet' },
      'sophie': { 'optimal': 'parfait', 'robust': 'résistant', 'comprehensive': 'détaillé' },
      'kevin': { 'optimal': 'nickel', 'robust': 'costaud', 'comprehensive': 'complet' }
    };

    const adaptations = personalityAdaptations[personality.toLowerCase()];
    if (adaptations && adaptations[word]) {
      return [adaptations[word]];
    }
  }

  // Suggestions contextuelles basiques
  const contextualMappings = {
    'optimal': context.includes('solution') ? ['idéale', 'parfaite'] : ['excellent', 'adapté'],
    'robust': context.includes('système') ? ['fiable', 'stable'] : ['solide', 'résistant'],
    'comprehensive': context.includes('analyse') ? ['approfondie', 'détaillée'] : ['complète', 'globale']
  };

  return contextualMappings[word] || ['standard'];
}

// ============= EXPORTS =============
module.exports = {
  detectLLMPatterns,
  replaceLLMFingerprints,
  replaceStructuralPatterns,
  analyzeFingerprintDensity,
  generateContextualAlternatives,
  LLM_FINGERPRINTS,
  STRUCTURAL_PATTERNS
};