seo-generator-server/lib/validation/SamplingEngine.js

/**
 * SamplingEngine.js
 *
 * Moteur d'échantillonnage pour Pipeline Validator
 * Extrait automatiquement des échantillons représentatifs du contenu généré
 */

const { logSh } = require('../ErrorReporting');
const { tracer } = require('../trace');
const fs = require('fs').promises;
const path = require('path');

/**
 * Classe SamplingEngine
 */
class SamplingEngine {
  constructor() {
    this.samples = {
      titles: [],
      content: [],
      faqs: []
    };
  }

  /**
   * Extrait les échantillons depuis les versions sauvegardées
   * @param {Array<string>} versionPaths - Chemins des fichiers JSON versions
   * @returns {Object} - Échantillons avec leurs versions
   */
  async extractSamples(versionPaths) {
    return tracer.run('SamplingEngine.extractSamples', async () => {
      logSh(`📊 Démarrage échantillonnage: ${versionPaths.length} versions`, 'INFO');

      // Charger la version finale pour identifier les échantillons
      const finalVersionPath = versionPaths.find(p => p.includes('v2.0.json'));
      if (!finalVersionPath) {
        throw new Error('Version finale v2.0.json introuvable');
      }

      const finalContent = await this.loadVersion(finalVersionPath);
      const allTags = Object.keys(finalContent);

      logSh(`   📋 ${allTags.length} balises trouvées dans version finale`, 'DEBUG');

      // Catégoriser les balises automatiquement
      const titleTags = allTags.filter(tag => tag.includes('T'));
      const contentTags = allTags.filter(tag => tag.includes('MC') || tag.includes('L')).slice(0, 4);
      const faqTags = allTags.filter(tag => tag.includes('FAQ')).slice(0, 4);

      logSh(`   ✓ Catégorisation: ${titleTags.length} titres, ${contentTags.length} contenus, ${faqTags.length} FAQ`, 'INFO');

      // Extraire versions pour chaque échantillon
      const samplesData = {};

      // Titres
      for (const tag of titleTags) {
        samplesData[tag] = await this.extractVersionsForTag(tag, versionPaths);
        samplesData[tag].type = 'title';
        this.samples.titles.push(tag);
      }

      // Contenus
      for (const tag of contentTags) {
        samplesData[tag] = await this.extractVersionsForTag(tag, versionPaths);
        samplesData[tag].type = 'content';
        this.samples.content.push(tag);
      }

      // FAQ
      for (const tag of faqTags) {
        samplesData[tag] = await this.extractVersionsForTag(tag, versionPaths);
        samplesData[tag].type = 'faq';
        this.samples.faqs.push(tag);
      }

      const totalSamples = titleTags.length + contentTags.length + faqTags.length;
      logSh(`✅ Échantillonnage terminé: ${totalSamples} échantillons extraits`, 'INFO');

      return {
        samples: samplesData,
        summary: {
          totalSamples,
          titles: titleTags.length,
          content: contentTags.length,
          faqs: faqTags.length
        }
      };

    }, { versionsCount: versionPaths.length });
  }

  /**
   * Extrait les versions d'une balise à travers toutes les étapes
   * @param {string} tag - Balise à extraire
   * @param {Array<string>} versionPaths - Chemins des versions
   * @returns {Object} - Versions de la balise
   */
  async extractVersionsForTag(tag, versionPaths) {
    const versions = {};

    for (const versionPath of versionPaths) {
      try {
        const content = await this.loadVersion(versionPath);
        const versionName = path.basename(versionPath, '.json');

        // Stocker le contenu de cette balise pour cette version
        versions[versionName] = content[tag] || "[Non disponible à cette étape]";

      } catch (error) {
        logSh(`⚠️ Erreur lecture version ${versionPath}: ${error.message}`, 'WARN');
        versions[path.basename(versionPath, '.json')] = "[Erreur lecture]";
      }
    }

    return {
      tag,
      versions
    };
  }

  /**
   * Charge un fichier version JSON
   * @param {string} versionPath - Chemin du fichier
   * @returns {Object} - Contenu JSON
   */
  async loadVersion(versionPath) {
    try {
      const data = await fs.readFile(versionPath, 'utf8');
      return JSON.parse(data);
    } catch (error) {
      logSh(`❌ Erreur chargement version ${versionPath}: ${error.message}`, 'ERROR');
      throw error;
    }
  }

  /**
   * Sauvegarde les échantillons dans un fichier
   * @param {Object} samplesData - Données échantillons
   * @param {string} outputPath - Chemin de sauvegarde
   */
  async saveSamples(samplesData, outputPath) {
    try {
      await fs.writeFile(outputPath, JSON.stringify(samplesData, null, 2), 'utf8');
      logSh(`💾 Échantillons sauvegardés: ${outputPath}`, 'DEBUG');
    } catch (error) {
      logSh(`❌ Erreur sauvegarde échantillons: ${error.message}`, 'ERROR');
      throw error;
    }
  }

  /**
   * Obtient le résumé des échantillons
   */
  getSummary() {
    return {
      titles: this.samples.titles,
      content: this.samples.content,
      faqs: this.samples.faqs,
      total: this.samples.titles.length + this.samples.content.length + this.samples.faqs.length
    };
  }

  /**
   * Reset l'état
   */
  reset() {
    this.samples = {
      titles: [],
      content: [],
      faqs: []
    };
  }
}

module.exports = { SamplingEngine };