Added plan.md with complete architecture for format-agnostic content generation: - Support for Markdown, HTML, Plain Text, JSON formats - New FormatExporter module with neutral data structure - Integration strategy with existing ContentAssembly and ArticleStorage - Bonus features: SEO metadata generation, readability scoring, WordPress Gutenberg format - Implementation roadmap with 4 phases (6h total estimated) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
391 lines
12 KiB
JavaScript
391 lines
12 KiB
JavaScript
// ========================================
|
|
// ADVERSARIAL UTILS - UTILITAIRES MODULAIRES
|
|
// Responsabilité: Fonctions utilitaires partagées par tous les modules adversariaux
|
|
// Architecture: Helper functions réutilisables et composables
|
|
// ========================================
|
|
|
|
const { logSh } = require('../ErrorReporting');
|
|
|
|
/**
|
|
* ANALYSEURS DE CONTENU
|
|
*/
|
|
|
|
/**
|
|
* Analyser score de diversité lexicale
|
|
*/
|
|
function analyzeLexicalDiversity(content) {
|
|
if (!content || typeof content !== 'string') return 0;
|
|
|
|
const words = content.toLowerCase()
|
|
.split(/\s+/)
|
|
.filter(word => word.length > 2)
|
|
.map(word => word.replace(/[^\w]/g, ''));
|
|
|
|
if (words.length === 0) return 0;
|
|
|
|
const uniqueWords = [...new Set(words)];
|
|
return (uniqueWords.length / words.length) * 100;
|
|
}
|
|
|
|
/**
|
|
* Analyser variation des longueurs de phrases
|
|
*/
|
|
function analyzeSentenceVariation(content) {
|
|
if (!content || typeof content !== 'string') return 0;
|
|
|
|
const sentences = content.split(/[.!?]+/)
|
|
.map(s => s.trim())
|
|
.filter(s => s.length > 5);
|
|
|
|
if (sentences.length < 2) return 0;
|
|
|
|
const lengths = sentences.map(s => s.split(/\s+/).length);
|
|
const avgLength = lengths.reduce((a, b) => a + b, 0) / lengths.length;
|
|
const variance = lengths.reduce((acc, len) => acc + Math.pow(len - avgLength, 2), 0) / lengths.length;
|
|
const stdDev = Math.sqrt(variance);
|
|
|
|
return Math.min(100, (stdDev / avgLength) * 100);
|
|
}
|
|
|
|
/**
|
|
* Détecter mots typiques IA
|
|
*/
|
|
function detectAIFingerprints(content) {
|
|
const aiFingerprints = {
|
|
words: ['optimal', 'comprehensive', 'seamless', 'robust', 'leverage', 'cutting-edge', 'state-of-the-art', 'furthermore', 'moreover'],
|
|
phrases: ['it is important to note', 'it should be noted', 'it is worth mentioning', 'in conclusion', 'to summarize'],
|
|
connectors: ['par ailleurs', 'en effet', 'de plus', 'cependant', 'ainsi', 'donc']
|
|
};
|
|
|
|
const results = {
|
|
words: 0,
|
|
phrases: 0,
|
|
connectors: 0,
|
|
totalScore: 0
|
|
};
|
|
|
|
const lowerContent = content.toLowerCase();
|
|
|
|
// Compter mots IA
|
|
aiFingerprints.words.forEach(word => {
|
|
const matches = (lowerContent.match(new RegExp(`\\b${word}\\b`, 'g')) || []);
|
|
results.words += matches.length;
|
|
});
|
|
|
|
// Compter phrases typiques
|
|
aiFingerprints.phrases.forEach(phrase => {
|
|
if (lowerContent.includes(phrase)) {
|
|
results.phrases += 1;
|
|
}
|
|
});
|
|
|
|
// Compter connecteurs répétitifs
|
|
aiFingerprints.connectors.forEach(connector => {
|
|
const matches = (lowerContent.match(new RegExp(`\\b${connector}\\b`, 'g')) || []);
|
|
if (matches.length > 1) {
|
|
results.connectors += matches.length - 1; // Pénalité répétition
|
|
}
|
|
});
|
|
|
|
// Score total (sur 100)
|
|
const wordCount = content.split(/\s+/).length;
|
|
results.totalScore = Math.min(100,
|
|
(results.words * 5 + results.phrases * 10 + results.connectors * 3) / Math.max(wordCount, 1) * 100
|
|
);
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Analyser uniformité structurelle
|
|
*/
|
|
function analyzeStructuralUniformity(content) {
|
|
const sentences = content.split(/[.!?]+/)
|
|
.map(s => s.trim())
|
|
.filter(s => s.length > 5);
|
|
|
|
if (sentences.length < 3) return 0;
|
|
|
|
const structures = sentences.map(sentence => {
|
|
const words = sentence.split(/\s+/);
|
|
return {
|
|
length: words.length,
|
|
startsWithConnector: /^(par ailleurs|en effet|de plus|cependant|ainsi|donc|ensuite|puis)/i.test(sentence),
|
|
hasComma: sentence.includes(','),
|
|
hasSubordinate: /qui|que|dont|où|quand|comme|parce que|puisque|bien que/i.test(sentence)
|
|
};
|
|
});
|
|
|
|
// Calculer uniformité
|
|
const avgLength = structures.reduce((sum, s) => sum + s.length, 0) / structures.length;
|
|
const lengthVariance = structures.reduce((sum, s) => sum + Math.pow(s.length - avgLength, 2), 0) / structures.length;
|
|
|
|
const connectorRatio = structures.filter(s => s.startsWithConnector).length / structures.length;
|
|
const commaRatio = structures.filter(s => s.hasComma).length / structures.length;
|
|
|
|
// Plus c'est uniforme, plus le score est élevé (mauvais pour anti-détection)
|
|
const uniformityScore = 100 - (Math.sqrt(lengthVariance) / avgLength * 100) -
|
|
(Math.abs(0.3 - connectorRatio) * 50) - (Math.abs(0.5 - commaRatio) * 30);
|
|
|
|
return Math.max(0, Math.min(100, uniformityScore));
|
|
}
|
|
|
|
/**
|
|
* COMPARATEURS DE CONTENU
|
|
*/
|
|
|
|
/**
|
|
* Comparer deux contenus et calculer taux de modification
|
|
*/
|
|
function compareContentModification(original, modified) {
|
|
if (!original || !modified) return 0;
|
|
|
|
const originalWords = original.toLowerCase().split(/\s+/).filter(w => w.length > 2);
|
|
const modifiedWords = modified.toLowerCase().split(/\s+/).filter(w => w.length > 2);
|
|
|
|
// Calcul de distance Levenshtein approximative (par mots)
|
|
let changes = 0;
|
|
const maxLength = Math.max(originalWords.length, modifiedWords.length);
|
|
|
|
for (let i = 0; i < maxLength; i++) {
|
|
if (originalWords[i] !== modifiedWords[i]) {
|
|
changes++;
|
|
}
|
|
}
|
|
|
|
return (changes / maxLength) * 100;
|
|
}
|
|
|
|
/**
|
|
* Évaluer amélioration adversariale
|
|
*/
|
|
function evaluateAdversarialImprovement(original, modified, detectorTarget = 'general') {
|
|
const originalFingerprints = detectAIFingerprints(original);
|
|
const modifiedFingerprints = detectAIFingerprints(modified);
|
|
|
|
const originalDiversity = analyzeLexicalDiversity(original);
|
|
const modifiedDiversity = analyzeLexicalDiversity(modified);
|
|
|
|
const originalVariation = analyzeSentenceVariation(original);
|
|
const modifiedVariation = analyzeSentenceVariation(modified);
|
|
|
|
const fingerprintReduction = originalFingerprints.totalScore - modifiedFingerprints.totalScore;
|
|
const diversityIncrease = modifiedDiversity - originalDiversity;
|
|
const variationIncrease = modifiedVariation - originalVariation;
|
|
|
|
const improvementScore = (
|
|
fingerprintReduction * 0.4 +
|
|
diversityIncrease * 0.3 +
|
|
variationIncrease * 0.3
|
|
);
|
|
|
|
return {
|
|
fingerprintReduction,
|
|
diversityIncrease,
|
|
variationIncrease,
|
|
improvementScore: Math.round(improvementScore * 100) / 100,
|
|
modificationRate: compareContentModification(original, modified),
|
|
recommendation: getImprovementRecommendation(improvementScore, detectorTarget)
|
|
};
|
|
}
|
|
|
|
/**
|
|
* UTILITAIRES DE CONTENU
|
|
*/
|
|
|
|
/**
|
|
* Nettoyer contenu adversarial généré
|
|
*/
|
|
function cleanAdversarialContent(content) {
|
|
if (!content || typeof content !== 'string') return content;
|
|
|
|
let cleaned = content;
|
|
|
|
// Supprimer préfixes de génération
|
|
cleaned = cleaned.replace(/^(voici\s+)?le\s+contenu\s+(réécrit|amélioré|modifié)[:\s]*/gi, '');
|
|
cleaned = cleaned.replace(/^(bon,?\s*)?(alors,?\s*)?(pour\s+)?(ce\s+contenu[,\s]*)?/gi, '');
|
|
|
|
// Nettoyer formatage
|
|
cleaned = cleaned.replace(/\*\*[^*]+\*\*/g, ''); // Gras markdown
|
|
cleaned = cleaned.replace(/\s{2,}/g, ' '); // Espaces multiples
|
|
cleaned = cleaned.replace(/([.!?])\s*([.!?])/g, '$1 '); // Double ponctuation
|
|
|
|
// Nettoyer début/fin
|
|
cleaned = cleaned.trim();
|
|
cleaned = cleaned.replace(/^[,.\s]+/, '');
|
|
cleaned = cleaned.replace(/[,\s]+$/, '');
|
|
|
|
return cleaned;
|
|
}
|
|
|
|
/**
|
|
* Valider qualité du contenu adversarial
|
|
*/
|
|
function validateAdversarialContent(content, originalContent, minLength = 10, maxModificationRate = 90) {
|
|
const validation = {
|
|
isValid: true,
|
|
issues: [],
|
|
suggestions: []
|
|
};
|
|
|
|
// Vérifier longueur minimale
|
|
if (!content || content.length < minLength) {
|
|
validation.isValid = false;
|
|
validation.issues.push('Contenu trop court');
|
|
validation.suggestions.push('Augmenter la longueur du contenu généré');
|
|
}
|
|
|
|
// Vérifier cohérence
|
|
if (originalContent) {
|
|
const modificationRate = compareContentModification(originalContent, content);
|
|
|
|
if (modificationRate > maxModificationRate) {
|
|
validation.issues.push('Modification trop importante');
|
|
validation.suggestions.push('Réduire l\'intensité adversariale pour préserver le sens');
|
|
}
|
|
|
|
if (modificationRate < 5) {
|
|
validation.issues.push('Modification insuffisante');
|
|
validation.suggestions.push('Augmenter l\'intensité adversariale');
|
|
}
|
|
}
|
|
|
|
// Vérifier empreintes IA résiduelles
|
|
const fingerprints = detectAIFingerprints(content);
|
|
if (fingerprints.totalScore > 15) {
|
|
validation.issues.push('Empreintes IA encore présentes');
|
|
validation.suggestions.push('Appliquer post-processing anti-fingerprints');
|
|
}
|
|
|
|
return validation;
|
|
}
|
|
|
|
/**
|
|
* UTILITAIRES TECHNIQUES
|
|
*/
|
|
|
|
/**
|
|
* Chunk array avec préservation des paires
|
|
*/
|
|
function chunkArraySmart(array, size, preservePairs = false) {
|
|
if (!preservePairs) {
|
|
return chunkArray(array, size);
|
|
}
|
|
|
|
const chunks = [];
|
|
for (let i = 0; i < array.length; i += size) {
|
|
let chunk = array.slice(i, i + size);
|
|
|
|
// Si on coupe au milieu d'une paire (nombre impair), ajuster
|
|
if (chunk.length % 2 !== 0 && i + size < array.length) {
|
|
chunk = array.slice(i, i + size - 1);
|
|
}
|
|
|
|
chunks.push(chunk);
|
|
}
|
|
|
|
return chunks;
|
|
}
|
|
|
|
/**
|
|
* Chunk array standard
|
|
*/
|
|
function chunkArray(array, size) {
|
|
const chunks = [];
|
|
for (let i = 0; i < array.length; i += size) {
|
|
chunks.push(array.slice(i, i + size));
|
|
}
|
|
return chunks;
|
|
}
|
|
|
|
/**
|
|
* Sleep avec variation
|
|
*/
|
|
function sleep(ms, variation = 0.2) {
|
|
const actualMs = ms + (Math.random() - 0.5) * ms * variation;
|
|
return new Promise(resolve => setTimeout(resolve, Math.max(100, actualMs)));
|
|
}
|
|
|
|
/**
|
|
* RECOMMANDATIONS
|
|
*/
|
|
|
|
/**
|
|
* Obtenir recommandation d'amélioration
|
|
*/
|
|
function getImprovementRecommendation(score, detectorTarget) {
|
|
const recommendations = {
|
|
general: {
|
|
good: "Bon niveau d'amélioration générale",
|
|
medium: "Appliquer techniques de variation syntaxique",
|
|
poor: "Nécessite post-processing intensif"
|
|
},
|
|
gptZero: {
|
|
good: "Imprévisibilité suffisante contre GPTZero",
|
|
medium: "Ajouter plus de ruptures narratives",
|
|
poor: "Intensifier variation syntaxique et lexicale"
|
|
},
|
|
originality: {
|
|
good: "Créativité suffisante contre Originality",
|
|
medium: "Enrichir diversité sémantique",
|
|
poor: "Réinventer présentation des informations"
|
|
}
|
|
};
|
|
|
|
const category = score > 10 ? 'good' : score > 5 ? 'medium' : 'poor';
|
|
return recommendations[detectorTarget]?.[category] || recommendations.general[category];
|
|
}
|
|
|
|
/**
|
|
* MÉTRIQUES ET STATS
|
|
*/
|
|
|
|
/**
|
|
* Calculer score composite anti-détection
|
|
*/
|
|
function calculateAntiDetectionScore(content, detectorTarget = 'general') {
|
|
const diversity = analyzeLexicalDiversity(content);
|
|
const variation = analyzeSentenceVariation(content);
|
|
const fingerprints = detectAIFingerprints(content);
|
|
const uniformity = analyzeStructuralUniformity(content);
|
|
|
|
const baseScore = (diversity * 0.3 + variation * 0.3 + (100 - fingerprints.totalScore) * 0.2 + (100 - uniformity) * 0.2);
|
|
|
|
// Ajustements selon détecteur
|
|
let adjustedScore = baseScore;
|
|
switch (detectorTarget) {
|
|
case 'gptZero':
|
|
adjustedScore = baseScore * (variation / 100) * 1.2; // Favorise variation
|
|
break;
|
|
case 'originality':
|
|
adjustedScore = baseScore * (diversity / 100) * 1.2; // Favorise diversité
|
|
break;
|
|
}
|
|
|
|
return Math.min(100, Math.max(0, Math.round(adjustedScore)));
|
|
}
|
|
|
|
module.exports = {
|
|
// Analyseurs
|
|
analyzeLexicalDiversity,
|
|
analyzeSentenceVariation,
|
|
detectAIFingerprints,
|
|
analyzeStructuralUniformity,
|
|
|
|
// Comparateurs
|
|
compareContentModification,
|
|
evaluateAdversarialImprovement,
|
|
|
|
// Utilitaires contenu
|
|
cleanAdversarialContent,
|
|
validateAdversarialContent,
|
|
|
|
// Utilitaires techniques
|
|
chunkArray,
|
|
chunkArraySmart,
|
|
sleep,
|
|
|
|
// Métriques
|
|
calculateAntiDetectionScore,
|
|
getImprovementRecommendation
|
|
}; |