seogeneratorserver/lib/adversarial-generation/AdversarialUtils.js
StillHammer dbf1a3de8c Add technical plan for multi-format export system
Added plan.md with complete architecture for format-agnostic content generation:
- Support for Markdown, HTML, Plain Text, JSON formats
- New FormatExporter module with neutral data structure
- Integration strategy with existing ContentAssembly and ArticleStorage
- Bonus features: SEO metadata generation, readability scoring, WordPress Gutenberg format
- Implementation roadmap with 4 phases (6h total estimated)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-18 16:14:29 +08:00

391 lines
12 KiB
JavaScript

// ========================================
// ADVERSARIAL UTILS - UTILITAIRES MODULAIRES
// Responsabilité: Fonctions utilitaires partagées par tous les modules adversariaux
// Architecture: Helper functions réutilisables et composables
// ========================================
const { logSh } = require('../ErrorReporting');
/**
* ANALYSEURS DE CONTENU
*/
/**
* Analyser score de diversité lexicale
*/
function analyzeLexicalDiversity(content) {
if (!content || typeof content !== 'string') return 0;
const words = content.toLowerCase()
.split(/\s+/)
.filter(word => word.length > 2)
.map(word => word.replace(/[^\w]/g, ''));
if (words.length === 0) return 0;
const uniqueWords = [...new Set(words)];
return (uniqueWords.length / words.length) * 100;
}
/**
* Analyser variation des longueurs de phrases
*/
function analyzeSentenceVariation(content) {
if (!content || typeof content !== 'string') return 0;
const sentences = content.split(/[.!?]+/)
.map(s => s.trim())
.filter(s => s.length > 5);
if (sentences.length < 2) return 0;
const lengths = sentences.map(s => s.split(/\s+/).length);
const avgLength = lengths.reduce((a, b) => a + b, 0) / lengths.length;
const variance = lengths.reduce((acc, len) => acc + Math.pow(len - avgLength, 2), 0) / lengths.length;
const stdDev = Math.sqrt(variance);
return Math.min(100, (stdDev / avgLength) * 100);
}
/**
* Détecter mots typiques IA
*/
function detectAIFingerprints(content) {
const aiFingerprints = {
words: ['optimal', 'comprehensive', 'seamless', 'robust', 'leverage', 'cutting-edge', 'state-of-the-art', 'furthermore', 'moreover'],
phrases: ['it is important to note', 'it should be noted', 'it is worth mentioning', 'in conclusion', 'to summarize'],
connectors: ['par ailleurs', 'en effet', 'de plus', 'cependant', 'ainsi', 'donc']
};
const results = {
words: 0,
phrases: 0,
connectors: 0,
totalScore: 0
};
const lowerContent = content.toLowerCase();
// Compter mots IA
aiFingerprints.words.forEach(word => {
const matches = (lowerContent.match(new RegExp(`\\b${word}\\b`, 'g')) || []);
results.words += matches.length;
});
// Compter phrases typiques
aiFingerprints.phrases.forEach(phrase => {
if (lowerContent.includes(phrase)) {
results.phrases += 1;
}
});
// Compter connecteurs répétitifs
aiFingerprints.connectors.forEach(connector => {
const matches = (lowerContent.match(new RegExp(`\\b${connector}\\b`, 'g')) || []);
if (matches.length > 1) {
results.connectors += matches.length - 1; // Pénalité répétition
}
});
// Score total (sur 100)
const wordCount = content.split(/\s+/).length;
results.totalScore = Math.min(100,
(results.words * 5 + results.phrases * 10 + results.connectors * 3) / Math.max(wordCount, 1) * 100
);
return results;
}
/**
* Analyser uniformité structurelle
*/
function analyzeStructuralUniformity(content) {
const sentences = content.split(/[.!?]+/)
.map(s => s.trim())
.filter(s => s.length > 5);
if (sentences.length < 3) return 0;
const structures = sentences.map(sentence => {
const words = sentence.split(/\s+/);
return {
length: words.length,
startsWithConnector: /^(par ailleurs|en effet|de plus|cependant|ainsi|donc|ensuite|puis)/i.test(sentence),
hasComma: sentence.includes(','),
hasSubordinate: /qui|que|dont|où|quand|comme|parce que|puisque|bien que/i.test(sentence)
};
});
// Calculer uniformité
const avgLength = structures.reduce((sum, s) => sum + s.length, 0) / structures.length;
const lengthVariance = structures.reduce((sum, s) => sum + Math.pow(s.length - avgLength, 2), 0) / structures.length;
const connectorRatio = structures.filter(s => s.startsWithConnector).length / structures.length;
const commaRatio = structures.filter(s => s.hasComma).length / structures.length;
// Plus c'est uniforme, plus le score est élevé (mauvais pour anti-détection)
const uniformityScore = 100 - (Math.sqrt(lengthVariance) / avgLength * 100) -
(Math.abs(0.3 - connectorRatio) * 50) - (Math.abs(0.5 - commaRatio) * 30);
return Math.max(0, Math.min(100, uniformityScore));
}
/**
* COMPARATEURS DE CONTENU
*/
/**
* Comparer deux contenus et calculer taux de modification
*/
function compareContentModification(original, modified) {
if (!original || !modified) return 0;
const originalWords = original.toLowerCase().split(/\s+/).filter(w => w.length > 2);
const modifiedWords = modified.toLowerCase().split(/\s+/).filter(w => w.length > 2);
// Calcul de distance Levenshtein approximative (par mots)
let changes = 0;
const maxLength = Math.max(originalWords.length, modifiedWords.length);
for (let i = 0; i < maxLength; i++) {
if (originalWords[i] !== modifiedWords[i]) {
changes++;
}
}
return (changes / maxLength) * 100;
}
/**
* Évaluer amélioration adversariale
*/
function evaluateAdversarialImprovement(original, modified, detectorTarget = 'general') {
const originalFingerprints = detectAIFingerprints(original);
const modifiedFingerprints = detectAIFingerprints(modified);
const originalDiversity = analyzeLexicalDiversity(original);
const modifiedDiversity = analyzeLexicalDiversity(modified);
const originalVariation = analyzeSentenceVariation(original);
const modifiedVariation = analyzeSentenceVariation(modified);
const fingerprintReduction = originalFingerprints.totalScore - modifiedFingerprints.totalScore;
const diversityIncrease = modifiedDiversity - originalDiversity;
const variationIncrease = modifiedVariation - originalVariation;
const improvementScore = (
fingerprintReduction * 0.4 +
diversityIncrease * 0.3 +
variationIncrease * 0.3
);
return {
fingerprintReduction,
diversityIncrease,
variationIncrease,
improvementScore: Math.round(improvementScore * 100) / 100,
modificationRate: compareContentModification(original, modified),
recommendation: getImprovementRecommendation(improvementScore, detectorTarget)
};
}
/**
* UTILITAIRES DE CONTENU
*/
/**
* Nettoyer contenu adversarial généré
*/
function cleanAdversarialContent(content) {
if (!content || typeof content !== 'string') return content;
let cleaned = content;
// Supprimer préfixes de génération
cleaned = cleaned.replace(/^(voici\s+)?le\s+contenu\s+(réécrit|amélioré|modifié)[:\s]*/gi, '');
cleaned = cleaned.replace(/^(bon,?\s*)?(alors,?\s*)?(pour\s+)?(ce\s+contenu[,\s]*)?/gi, '');
// Nettoyer formatage
cleaned = cleaned.replace(/\*\*[^*]+\*\*/g, ''); // Gras markdown
cleaned = cleaned.replace(/\s{2,}/g, ' '); // Espaces multiples
cleaned = cleaned.replace(/([.!?])\s*([.!?])/g, '$1 '); // Double ponctuation
// Nettoyer début/fin
cleaned = cleaned.trim();
cleaned = cleaned.replace(/^[,.\s]+/, '');
cleaned = cleaned.replace(/[,\s]+$/, '');
return cleaned;
}
/**
* Valider qualité du contenu adversarial
*/
function validateAdversarialContent(content, originalContent, minLength = 10, maxModificationRate = 90) {
const validation = {
isValid: true,
issues: [],
suggestions: []
};
// Vérifier longueur minimale
if (!content || content.length < minLength) {
validation.isValid = false;
validation.issues.push('Contenu trop court');
validation.suggestions.push('Augmenter la longueur du contenu généré');
}
// Vérifier cohérence
if (originalContent) {
const modificationRate = compareContentModification(originalContent, content);
if (modificationRate > maxModificationRate) {
validation.issues.push('Modification trop importante');
validation.suggestions.push('Réduire l\'intensité adversariale pour préserver le sens');
}
if (modificationRate < 5) {
validation.issues.push('Modification insuffisante');
validation.suggestions.push('Augmenter l\'intensité adversariale');
}
}
// Vérifier empreintes IA résiduelles
const fingerprints = detectAIFingerprints(content);
if (fingerprints.totalScore > 15) {
validation.issues.push('Empreintes IA encore présentes');
validation.suggestions.push('Appliquer post-processing anti-fingerprints');
}
return validation;
}
/**
* UTILITAIRES TECHNIQUES
*/
/**
* Chunk array avec préservation des paires
*/
function chunkArraySmart(array, size, preservePairs = false) {
if (!preservePairs) {
return chunkArray(array, size);
}
const chunks = [];
for (let i = 0; i < array.length; i += size) {
let chunk = array.slice(i, i + size);
// Si on coupe au milieu d'une paire (nombre impair), ajuster
if (chunk.length % 2 !== 0 && i + size < array.length) {
chunk = array.slice(i, i + size - 1);
}
chunks.push(chunk);
}
return chunks;
}
/**
* Chunk array standard
*/
function chunkArray(array, size) {
const chunks = [];
for (let i = 0; i < array.length; i += size) {
chunks.push(array.slice(i, i + size));
}
return chunks;
}
/**
* Sleep avec variation
*/
function sleep(ms, variation = 0.2) {
const actualMs = ms + (Math.random() - 0.5) * ms * variation;
return new Promise(resolve => setTimeout(resolve, Math.max(100, actualMs)));
}
/**
* RECOMMANDATIONS
*/
/**
* Obtenir recommandation d'amélioration
*/
function getImprovementRecommendation(score, detectorTarget) {
const recommendations = {
general: {
good: "Bon niveau d'amélioration générale",
medium: "Appliquer techniques de variation syntaxique",
poor: "Nécessite post-processing intensif"
},
gptZero: {
good: "Imprévisibilité suffisante contre GPTZero",
medium: "Ajouter plus de ruptures narratives",
poor: "Intensifier variation syntaxique et lexicale"
},
originality: {
good: "Créativité suffisante contre Originality",
medium: "Enrichir diversité sémantique",
poor: "Réinventer présentation des informations"
}
};
const category = score > 10 ? 'good' : score > 5 ? 'medium' : 'poor';
return recommendations[detectorTarget]?.[category] || recommendations.general[category];
}
/**
* MÉTRIQUES ET STATS
*/
/**
* Calculer score composite anti-détection
*/
function calculateAntiDetectionScore(content, detectorTarget = 'general') {
const diversity = analyzeLexicalDiversity(content);
const variation = analyzeSentenceVariation(content);
const fingerprints = detectAIFingerprints(content);
const uniformity = analyzeStructuralUniformity(content);
const baseScore = (diversity * 0.3 + variation * 0.3 + (100 - fingerprints.totalScore) * 0.2 + (100 - uniformity) * 0.2);
// Ajustements selon détecteur
let adjustedScore = baseScore;
switch (detectorTarget) {
case 'gptZero':
adjustedScore = baseScore * (variation / 100) * 1.2; // Favorise variation
break;
case 'originality':
adjustedScore = baseScore * (diversity / 100) * 1.2; // Favorise diversité
break;
}
return Math.min(100, Math.max(0, Math.round(adjustedScore)));
}
module.exports = {
// Analyseurs
analyzeLexicalDiversity,
analyzeSentenceVariation,
detectAIFingerprints,
analyzeStructuralUniformity,
// Comparateurs
compareContentModification,
evaluateAdversarialImprovement,
// Utilitaires contenu
cleanAdversarialContent,
validateAdversarialContent,
// Utilitaires techniques
chunkArray,
chunkArraySmart,
sleep,
// Métriques
calculateAntiDetectionScore,
getImprovementRecommendation
};