seo-generator-server/lib/pattern-breaking/SyntaxVariations.js

// ========================================
// FICHIER: SyntaxVariations.js
// RESPONSABILITÉ: Variations syntaxiques pour casser patterns LLM
// Techniques: découpage, fusion, restructuration phrases
// ========================================

const { logSh } = require('../ErrorReporting');

/**
 * PATTERNS SYNTAXIQUES TYPIQUES LLM À ÉVITER
 */
const LLM_SYNTAX_PATTERNS = {
  // Structures trop prévisibles
  repetitiveStarts: [
    /^Il est important de/gi,
    /^Il convient de/gi,
    /^Il faut noter que/gi,
    /^Dans ce contexte/gi,
    /^Par ailleurs/gi
  ],

  // Phrases trop parfaites
  perfectStructures: [
    /^De plus, .+ En outre, .+ Enfin,/gi,
    /^Premièrement, .+ Deuxièmement, .+ Troisièmement,/gi
  ],

  // Longueurs trop régulières (détection pattern)
  uniformLengths: true // Détecté dynamiquement
};

/**
 * VARIATION STRUCTURES SYNTAXIQUES - FONCTION PRINCIPALE
 * @param {string} text - Texte à varier
 * @param {number} intensity - Intensité variation (0-1)
 * @param {object} options - Options { preserveReadability, maxModifications }
 * @returns {object} - { content, modifications, stats }
 */
function varyStructures(text, intensity = 0.3, options = {}) {
  if (!text || text.trim().length === 0) {
    return { content: text, modifications: 0 };
  }

  const config = {
    preserveReadability: true,
    maxModifications: 3,
    ...options
  };

  logSh(`📝 Variation syntaxique: intensité ${intensity}, préservation: ${config.preserveReadability}`, 'DEBUG');

  let modifiedText = text;
  let totalModifications = 0;
  const stats = {
    sentencesSplit: 0,
    sentencesMerged: 0,
    structuresReorganized: 0,
    repetitiveStartsFixed: 0
  };

  try {
    // 1. Analyser structure phrases
    const sentences = analyzeSentenceStructure(modifiedText);
    logSh(`   📊 ${sentences.length} phrases analysées`, 'DEBUG');

    // 2. Découper phrases longues
    if (Math.random() < intensity) {
      const splitResult = splitLongSentences(modifiedText, intensity);
      modifiedText = splitResult.content;
      totalModifications += splitResult.modifications;
      stats.sentencesSplit = splitResult.modifications;
    }

    // 3. Fusionner phrases courtes
    if (Math.random() < intensity * 0.7) {
      const mergeResult = mergeShorter(modifiedText, intensity);
      modifiedText = mergeResult.content;
      totalModifications += mergeResult.modifications;
      stats.sentencesMerged = mergeResult.modifications;
    }

    // 4. Réorganiser structures prévisibles
    if (Math.random() < intensity * 0.8) {
      const reorganizeResult = reorganizeStructures(modifiedText, intensity);
      modifiedText = reorganizeResult.content;
      totalModifications += reorganizeResult.modifications;
      stats.structuresReorganized = reorganizeResult.modifications;
    }

    // 5. Corriger débuts répétitifs
    if (Math.random() < intensity * 0.6) {
      const repetitiveResult = fixRepetitiveStarts(modifiedText);
      modifiedText = repetitiveResult.content;
      totalModifications += repetitiveResult.modifications;
      stats.repetitiveStartsFixed = repetitiveResult.modifications;
    }

    // 6. Limitation sécurité
    if (totalModifications > config.maxModifications) {
      logSh(`   ⚠️ Limitation appliquée: ${totalModifications} → ${config.maxModifications} modifications`, 'DEBUG');
      totalModifications = config.maxModifications;
    }

    logSh(`📝 Syntaxe modifiée: ${totalModifications} changements (${stats.sentencesSplit} splits, ${stats.sentencesMerged} merges)`, 'DEBUG');

  } catch (error) {
    logSh(`❌ Erreur variation syntaxique: ${error.message}`, 'WARNING');
    return { content: text, modifications: 0, stats: {} };
  }

  return {
    content: modifiedText,
    modifications: totalModifications,
    stats
  };
}

/**
 * ANALYSE STRUCTURE PHRASES
 */
function analyzeSentenceStructure(text) {
  const sentences = text.split(/[.!?]+/).filter(s => s.trim().length > 0);

  return sentences.map((sentence, index) => ({
    index,
    content: sentence.trim(),
    length: sentence.trim().length,
    wordCount: sentence.trim().split(/\s+/).length,
    isLong: sentence.trim().length > 120,
    isShort: sentence.trim().length < 40,
    hasComplexStructure: sentence.includes(',') && sentence.includes(' qui ') || sentence.includes(' que ')
  }));
}

/**
 * DÉCOUPAGE PHRASES LONGUES
 */
function splitLongSentences(text, intensity) {
  let modified = text;
  let modifications = 0;

  const sentences = modified.split('. ');
  const processedSentences = sentences.map(sentence => {

    // Phrases longues (>100 chars) et probabilité selon intensité - PLUS AGRESSIF
    if (sentence.length > 100 && Math.random() < (intensity * 0.6)) {

      // Points de découpe naturels
      const cutPoints = [
        { pattern: /, qui (.+)/, replacement: '. Celui-ci $1' },
        { pattern: /, que (.+)/, replacement: '. Cela $1' },
        { pattern: /, dont (.+)/, replacement: '. Celui-ci $1' },
        { pattern: / et (.{30,})/, replacement: '. De plus, $1' },
        { pattern: /, car (.+)/, replacement: '. En effet, $1' },
        { pattern: /, mais (.+)/, replacement: '. Cependant, $1' }
      ];

      for (const cutPoint of cutPoints) {
        if (sentence.match(cutPoint.pattern)) {
          const newSentence = sentence.replace(cutPoint.pattern, cutPoint.replacement);
          if (newSentence !== sentence) {
            modifications++;
            logSh(`     ✂️ Phrase découpée: ${sentence.length} → ${newSentence.length} chars`, 'DEBUG');
            return newSentence;
          }
        }
      }
    }

    return sentence;
  });

  return {
    content: processedSentences.join('. '),
    modifications
  };
}

/**
 * FUSION PHRASES COURTES
 */
function mergeShorter(text, intensity) {
  let modified = text;
  let modifications = 0;

  const sentences = modified.split('. ');
  const processedSentences = [];

  for (let i = 0; i < sentences.length; i++) {
    const current = sentences[i];
    const next = sentences[i + 1];

    // Si phrase courte (<50 chars) et phrase suivante existe - PLUS AGRESSIF
    if (current && current.length < 50 && next && next.length < 70 && Math.random() < (intensity * 0.5)) {

      // Connecteurs pour fusion naturelle
      const connectors = [', de plus,', ', également,', ', aussi,', ' et'];
      const connector = connectors[Math.floor(Math.random() * connectors.length)];

      const merged = current + connector + ' ' + next.toLowerCase();
      processedSentences.push(merged);
      modifications++;

      logSh(`     🔗 Phrases fusionnées: ${current.length} + ${next.length} → ${merged.length} chars`, 'DEBUG');

      i++; // Passer la phrase suivante car fusionnée
    } else {
      processedSentences.push(current);
    }
  }

  return {
    content: processedSentences.join('. '),
    modifications
  };
}

/**
 * RÉORGANISATION STRUCTURES PRÉVISIBLES
 */
function reorganizeStructures(text, intensity) {
  let modified = text;
  let modifications = 0;

  // Détecter énumérations prévisibles
  const enumerationPatterns = [
    {
      pattern: /Premièrement, (.+?)\. Deuxièmement, (.+?)\. Troisièmement, (.+?)\./gi,
      replacement: 'D\'abord, $1. Ensuite, $2. Enfin, $3.'
    },
    {
      pattern: /D\'une part, (.+?)\. D\'autre part, (.+?)\./gi,
      replacement: 'Tout d\'abord, $1. Par ailleurs, $2.'
    },
    {
      pattern: /En premier lieu, (.+?)\. En second lieu, (.+?)\./gi,
      replacement: 'Dans un premier temps, $1. Puis, $2.'
    }
  ];

  enumerationPatterns.forEach(pattern => {
    if (modified.match(pattern.pattern) && Math.random() < intensity) {
      modified = modified.replace(pattern.pattern, pattern.replacement);
      modifications++;
      logSh(`     🔄 Structure réorganisée: énumération variée`, 'DEBUG');
    }
  });

  return {
    content: modified,
    modifications
  };
}

/**
 * CORRECTION DÉBUTS RÉPÉTITIFS
 */
function fixRepetitiveStarts(text) {
  let modified = text;
  let modifications = 0;

  const sentences = modified.split('. ');
  const startWords = [];

  // Analyser débuts de phrases
  sentences.forEach(sentence => {
    const words = sentence.trim().split(/\s+/);
    if (words.length > 0) {
      startWords.push(words[0].toLowerCase());
    }
  });

  // Détecter répétitions
  const startCounts = {};
  startWords.forEach(word => {
    startCounts[word] = (startCounts[word] || 0) + 1;
  });

  // Remplacer débuts répétitifs
  const alternatives = {
    'il': ['Cet élément', 'Cette solution', 'Ce produit'],
    'cette': ['Cette option', 'Cette approche', 'Cette méthode'],
    'pour': ['Afin de', 'Dans le but de', 'En vue de'],
    'avec': ['Grâce à', 'Au moyen de', 'En utilisant'],
    'dans': ['Au sein de', 'À travers', 'Parmi']
  };

  const processedSentences = sentences.map(sentence => {
    const firstWord = sentence.trim().split(/\s+/)[0]?.toLowerCase();

    if (firstWord && startCounts[firstWord] > 2 && alternatives[firstWord] && Math.random() < 0.4) {
      const replacement = alternatives[firstWord][Math.floor(Math.random() * alternatives[firstWord].length)];
      const newSentence = sentence.replace(/^\w+/, replacement);
      modifications++;
      logSh(`     🔄 Début varié: "${firstWord}" → "${replacement}"`, 'DEBUG');
      return newSentence;
    }

    return sentence;
  });

  return {
    content: processedSentences.join('. '),
    modifications
  };
}

/**
 * DÉTECTION UNIFORMITÉ LONGUEURS (Pattern LLM)
 */
function detectUniformLengths(text) {
  const sentences = text.split(/[.!?]+/).filter(s => s.trim().length > 0);

  if (sentences.length < 3) return { uniform: false, variance: 0 };

  const lengths = sentences.map(s => s.trim().length);
  const avgLength = lengths.reduce((sum, len) => sum + len, 0) / lengths.length;

  // Calculer variance
  const variance = lengths.reduce((sum, len) => sum + Math.pow(len - avgLength, 2), 0) / lengths.length;
  const standardDev = Math.sqrt(variance);

  // Uniformité si écart-type faible par rapport à moyenne
  const coefficientVariation = standardDev / avgLength;
  const uniform = coefficientVariation < 0.3; // Seuil arbitraire

  return {
    uniform,
    variance: coefficientVariation,
    avgLength,
    standardDev,
    sentenceCount: sentences.length
  };
}

/**
 * AJOUT VARIATIONS MICRO-SYNTAXIQUES
 */
function addMicroVariations(text, intensity) {
  let modified = text;
  let modifications = 0;

  // Micro-variations subtiles
  const microPatterns = [
    { from: /\btrès (.+?)\b/g, to: 'particulièrement $1', probability: 0.3 },
    { from: /\bassez (.+?)\b/g, to: 'plutôt $1', probability: 0.4 },
    { from: /\bbeaucoup de/g, to: 'de nombreux', probability: 0.3 },
    { from: /\bpermets de/g, to: 'permet de', probability: 0.8 }, // Correction fréquente
    { from: /\bien effet\b/g, to: 'effectivement', probability: 0.2 }
  ];

  microPatterns.forEach(pattern => {
    if (Math.random() < (intensity * pattern.probability)) {
      const before = modified;
      modified = modified.replace(pattern.from, pattern.to);
      if (modified !== before) {
        modifications++;
        logSh(`     🔧 Micro-variation: ${pattern.from} → ${pattern.to}`, 'DEBUG');
      }
    }
  });

  return {
    content: modified,
    modifications
  };
}

// ============= EXPORTS =============
module.exports = {
  varyStructures,
  splitLongSentences,
  mergeShorter,
  reorganizeStructures,
  fixRepetitiveStarts,
  analyzeSentenceStructure,
  detectUniformLengths,
  addMicroVariations,
  LLM_SYNTAX_PATTERNS
};