seogeneratorserver/lib/post-processing/SentenceVariation.js

// ========================================
// PATTERN BREAKING - TECHNIQUE 1: SENTENCE VARIATION
// Responsabilité: Varier les longueurs de phrases pour casser l'uniformité
// Anti-détection: Éviter patterns syntaxiques réguliers des LLMs
// ========================================

const { logSh } = require('../ErrorReporting');
const { tracer } = require('../trace');

/**
 * MAIN ENTRY POINT - VARIATION LONGUEUR PHRASES
 * @param {Object} input - { content: {}, config: {}, context: {} }
 * @returns {Object} - { content: {}, stats: {}, debug: {} }
 */
async function applySentenceVariation(input) {
  return await tracer.run('SentenceVariation.applySentenceVariation()', async () => {
    const { content, config = {}, context = {} } = input;

    const {
      intensity = 0.3,           // Probabilité de modification (30%)
      splitThreshold = 100,      // Chars pour split
      mergeThreshold = 30,       // Chars pour merge
      preserveQuestions = true,  // Préserver questions FAQ
      preserveTitles = true      // Préserver titres
    } = config;

    await tracer.annotate({
      technique: 'sentence_variation',
      intensity,
      elementsCount: Object.keys(content).length
    });

    const startTime = Date.now();
    logSh(`📐 TECHNIQUE 1/3: Variation longueur phrases (intensité: ${intensity})`, 'INFO');
    logSh(`   📊 ${Object.keys(content).length} éléments à analyser`, 'DEBUG');

    try {
      const results = {};
      let totalProcessed = 0;
      let totalModified = 0;
      let modificationsDetails = [];

      // Traiter chaque élément de contenu
      for (const [tag, text] of Object.entries(content)) {
        totalProcessed++;

        // Skip certains éléments selon config
        if (shouldSkipElement(tag, text, { preserveQuestions, preserveTitles })) {
          results[tag] = text;
          logSh(`   ⏭️ [${tag}]: Préservé (${getSkipReason(tag, text)})`, 'DEBUG');
          continue;
        }

        // Appliquer variation si éligible
        const variationResult = varyTextStructure(text, {
          intensity,
          splitThreshold,
          mergeThreshold,
          tag
        });

        results[tag] = variationResult.text;

        if (variationResult.modified) {
          totalModified++;
          modificationsDetails.push({
            tag,
            modifications: variationResult.modifications,
            originalLength: text.length,
            newLength: variationResult.text.length
          });

          logSh(`   ✏️ [${tag}]: ${variationResult.modifications.length} modifications`, 'DEBUG');
        } else {
          logSh(`   ➡️ [${tag}]: Aucune modification`, 'DEBUG');
        }
      }

      const duration = Date.now() - startTime;
      const stats = {
        processed: totalProcessed,
        modified: totalModified,
        modificationRate: Math.round((totalModified / totalProcessed) * 100),
        duration,
        technique: 'sentence_variation'
      };

      logSh(`✅ VARIATION PHRASES: ${stats.modified}/${stats.processed} éléments modifiés (${stats.modificationRate}%) en ${duration}ms`, 'INFO');

      await tracer.event('Sentence variation terminée', stats);

      return {
        content: results,
        stats,
        debug: {
          technique: 'sentence_variation',
          config: { intensity, splitThreshold, mergeThreshold },
          modifications: modificationsDetails
        }
      };

    } catch (error) {
      const duration = Date.now() - startTime;
      logSh(`❌ VARIATION PHRASES échouée après ${duration}ms: ${error.message}`, 'ERROR');
      throw new Error(`SentenceVariation failed: ${error.message}`);
    }
  }, input);
}

/**
 * Appliquer variation structure à un texte
 */
function varyTextStructure(text, config) {
  const { intensity, splitThreshold, mergeThreshold, tag } = config;

  if (text.length < 50) {
    return { text, modified: false, modifications: [] };
  }

  // Séparer en phrases
  const sentences = splitIntoSentences(text);

  if (sentences.length < 2) {
    return { text, modified: false, modifications: [] };
  }

  let modifiedSentences = [...sentences];
  const modifications = [];

  // TECHNIQUE 1: SPLIT des phrases longues
  for (let i = 0; i < modifiedSentences.length; i++) {
    const sentence = modifiedSentences[i];

    if (sentence.length > splitThreshold && Math.random() < intensity) {
      const splitResult = splitLongSentence(sentence);
      if (splitResult.success) {
        modifiedSentences.splice(i, 1, splitResult.part1, splitResult.part2);
        modifications.push({
          type: 'split',
          original: sentence.substring(0, 50) + '...',
          result: `${splitResult.part1.substring(0, 25)}... | ${splitResult.part2.substring(0, 25)}...`
        });
        i++; // Skip la phrase suivante (qui est notre part2)
      }
    }
  }

  // TECHNIQUE 2: MERGE des phrases courtes
  for (let i = 0; i < modifiedSentences.length - 1; i++) {
    const current = modifiedSentences[i];
    const next = modifiedSentences[i + 1];

    if (current.length < mergeThreshold && next.length < mergeThreshold && Math.random() < intensity) {
      const merged = mergeSentences(current, next);
      if (merged.success) {
        modifiedSentences.splice(i, 2, merged.result);
        modifications.push({
          type: 'merge',
          original: `${current.substring(0, 20)}... + ${next.substring(0, 20)}...`,
          result: merged.result.substring(0, 50) + '...'
        });
      }
    }
  }

  const finalText = modifiedSentences.join(' ').trim();

  return {
    text: finalText,
    modified: modifications.length > 0,
    modifications
  };
}

/**
 * Diviser texte en phrases
 */
function splitIntoSentences(text) {
  // Regex plus sophistiquée pour gérer les abréviations
  const sentences = text.split(/(?<![A-Z][a-z]\.)\s*[.!?]+\s+/)
    .map(s => s.trim())
    .filter(s => s.length > 5);

  return sentences;
}

/**
 * Diviser une phrase longue en deux
 */
function splitLongSentence(sentence) {
  // Points de rupture naturels
  const breakPoints = [
    ', et ',
    ', mais ',
    ', car ',
    ', donc ',
    ', ainsi ',
    ', alors ',
    ', tandis que ',
    ', bien que '
  ];

  // Chercher le meilleur point de rupture proche du milieu
  const idealBreak = sentence.length / 2;
  let bestBreak = null;
  let bestDistance = Infinity;

  for (const breakPoint of breakPoints) {
    const index = sentence.indexOf(breakPoint, idealBreak - 50);
    if (index > 0 && index < sentence.length - 20) {
      const distance = Math.abs(index - idealBreak);
      if (distance < bestDistance) {
        bestDistance = distance;
        bestBreak = { index, breakPoint };
      }
    }
  }

  if (bestBreak) {
    const part1 = sentence.substring(0, bestBreak.index + 1).trim();
    const part2 = sentence.substring(bestBreak.index + bestBreak.breakPoint.length).trim();

    // Assurer que part2 commence par une majuscule
    const capitalizedPart2 = part2.charAt(0).toUpperCase() + part2.slice(1);

    return {
      success: true,
      part1,
      part2: capitalizedPart2
    };
  }

  return { success: false };
}

/**
 * Fusionner deux phrases courtes
 */
function mergeSentences(sentence1, sentence2) {
  // Connecteurs pour fusion naturelle
  const connectors = [
    'et',
    'puis',
    'aussi',
    'également',
    'de plus'
  ];

  // Choisir connecteur aléatoire
  const connector = connectors[Math.floor(Math.random() * connectors.length)];

  // Nettoyer les phrases
  let cleaned1 = sentence1.replace(/[.!?]+$/, '').trim();
  let cleaned2 = sentence2.trim();

  // Mettre sentence2 en minuscule sauf si nom propre
  if (!/^[A-Z][a-z]*\s+[A-Z]/.test(cleaned2)) {
    cleaned2 = cleaned2.charAt(0).toLowerCase() + cleaned2.slice(1);
  }

  const merged = `${cleaned1}, ${connector} ${cleaned2}`;

  return {
    success: merged.length < 200, // Éviter phrases trop longues
    result: merged
  };
}

/**
 * Déterminer si un élément doit être skippé
 */
function shouldSkipElement(tag, text, config) {
  // Skip titres si demandé
  if (config.preserveTitles && (tag.includes('Titre') || tag.includes('H1') || tag.includes('H2'))) {
    return true;
  }

  // Skip questions FAQ si demandé
  if (config.preserveQuestions && (tag.includes('Faq_q') || text.includes('?'))) {
    return true;
  }

  // Skip textes très courts
  if (text.length < 50) {
    return true;
  }

  return false;
}

/**
 * Obtenir raison du skip pour debug
 */
function getSkipReason(tag, text) {
  if (tag.includes('Titre') || tag.includes('H1') || tag.includes('H2')) return 'titre';
  if (tag.includes('Faq_q') || text.includes('?')) return 'question';
  if (text.length < 50) return 'trop court';
  return 'autre';
}

/**
 * Analyser les patterns de phrases d'un texte
 */
function analyzeSentencePatterns(text) {
  const sentences = splitIntoSentences(text);

  if (sentences.length < 2) {
    return { needsVariation: false, patterns: [] };
  }

  const lengths = sentences.map(s => s.length);
  const avgLength = lengths.reduce((a, b) => a + b, 0) / lengths.length;

  // Calculer uniformité (variance faible = uniformité élevée)
  const variance = lengths.reduce((acc, len) => acc + Math.pow(len - avgLength, 2), 0) / lengths.length;
  const uniformity = 1 / (1 + Math.sqrt(variance) / avgLength); // 0-1, 1 = très uniforme

  return {
    needsVariation: uniformity > 0.7, // Seuil d'uniformité problématique
    patterns: {
      avgLength: Math.round(avgLength),
      uniformity: Math.round(uniformity * 100),
      sentenceCount: sentences.length,
      variance: Math.round(variance)
    }
  };
}

module.exports = {
  applySentenceVariation,    // ← MAIN ENTRY POINT
  varyTextStructure,
  splitIntoSentences,
  splitLongSentence,
  mergeSentences,
  analyzeSentencePatterns
};