// Test de VRAIE consistance - 10 fois chaque cas pour voir la variance réelle import { default as IAEngine } from './src/DRS/services/IAEngine.js'; async function testRealConsistency() { console.log('🔄 TEST DE VRAIE CONSISTANCE - 10 itérations par cas\n'); console.log('================================================\n'); const engine = new IAEngine({ defaultProvider: 'openai', fallbackProviders: ['deepseek'] }); await new Promise(resolve => setTimeout(resolve, 1000)); // Les 4 cas de test, mais on va les tester 10 fois chacun const testCases = [ { name: 'WRONG: Science -> Nonsense', test: () => engine.validateComprehension( 'Albert Einstein developed the theory of relativity in the early 20th century.', 'Dancing unicorns eat rainbow cookies in space', { exerciseType: 'physics-comprehension', timestamp: Date.now() } ), expectedRange: [0, 30], type: 'WRONG' }, { name: 'CORRECT: History understanding', test: () => engine.validateComprehension( 'World War II ended in 1945 when Japan surrendered after atomic bombs.', 'World War 2 finished in 1945 when Japan gave up after nuclear attacks', { exerciseType: 'history-analysis', timestamp: Date.now() } ), expectedRange: [70, 100], type: 'CORRECT' }, { name: 'WRONG: French translation nonsense', test: () => engine.validateTranslation( 'Where is the library?', 'Elephant potato singing moon', { fromLang: 'en', toLang: 'fr', context: 'directions', timestamp: Date.now() } ), expectedRange: [0, 30], type: 'WRONG' }, { name: 'CORRECT: Spanish translation', test: () => engine.validateTranslation( 'What time is it?', '¿Qué hora es?', { fromLang: 'en', toLang: 'es', context: 'time', timestamp: Date.now() } ), expectedRange: [70, 100], type: 'CORRECT' } ]; const iterations = 10; const allResults = {}; for (const testCase of testCases) { console.log(`🧪 ${testCase.name} - Testing ${iterations} times`); console.log(` Expected: ${testCase.expectedRange[0]}-${testCase.expectedRange[1]} points\n`); const scores = []; const providers = []; const feedbacks = []; for (let i = 1; i <= iterations; i++) { try { console.log(` Round ${i}/10...`); // Ajout d'un ID unique pour éviter le cache const uniqueTest = async () => { if (testCase.name.includes('translation')) { return testCase.test(); } else { return testCase.test(); } }; const result = await uniqueTest(); scores.push(result.score); providers.push(result.provider); feedbacks.push(result.feedback?.substring(0, 50)); const [min, max] = testCase.expectedRange; const inRange = result.score >= min && result.score <= max; console.log(` Score: ${result.score} ${inRange ? '✅' : '❌'} (${result.provider})`); } catch (error) { console.log(` ❌ Error: ${error.message}`); scores.push('ERROR'); providers.push('ERROR'); feedbacks.push('ERROR'); } // Délai pour éviter rate limiting et forcer de nouvelles requêtes await new Promise(resolve => setTimeout(resolve, 3000)); } // Analyse des résultats pour ce cas const validScores = scores.filter(s => typeof s === 'number'); const [expectedMin, expectedMax] = testCase.expectedRange; const stats = { scores: scores, providers: providers, validCount: validScores.length, average: validScores.length > 0 ? Math.round(validScores.reduce((a, b) => a + b, 0) / validScores.length) : 'N/A', min: validScores.length > 0 ? Math.min(...validScores) : 'N/A', max: validScores.length > 0 ? Math.max(...validScores) : 'N/A', variance: validScores.length > 0 ? Math.max(...validScores) - Math.min(...validScores) : 'N/A', inRangeCount: validScores.filter(score => score >= expectedMin && score <= expectedMax).length, consistency: validScores.length > 0 ? (validScores.filter(score => score >= expectedMin && score <= expectedMax).length / validScores.length * 100).toFixed(1) : 'N/A' }; allResults[testCase.name] = stats; console.log(`\n 📊 RÉSULTATS pour "${testCase.name}":`); console.log(` Scores: [${scores.join(', ')}]`); console.log(` Moyenne: ${stats.average}`); console.log(` Min-Max: ${stats.min}-${stats.max} (variance: ${stats.variance})`); console.log(` Dans la plage: ${stats.inRangeCount}/${stats.validCount} (${stats.consistency}%)`); console.log(` Consistance: ${parseFloat(stats.consistency) >= 80 ? '✅ BONNE' : '❌ PROBLÉMATIQUE'}\n`); console.log(' ─────────────────────────────────────────────────────\n'); } // ANALYSE FINALE GLOBALE console.log('🎯 ANALYSE FINALE DE CONSISTANCE:'); console.log('==================================\n'); let totalConsistentCases = 0; let totalCases = 0; Object.entries(allResults).forEach(([name, stats]) => { totalCases++; const isConsistent = parseFloat(stats.consistency) >= 80; if (isConsistent) totalConsistentCases++; const status = isConsistent ? '✅' : '❌'; console.log(`${status} ${name}:`); console.log(` Consistance: ${stats.consistency}% (${stats.inRangeCount}/${stats.validCount})`); console.log(` Variance: ${stats.variance} points`); console.log(` Moyenne: ${stats.average}\n`); }); const globalConsistency = (totalConsistentCases / totalCases * 100).toFixed(1); console.log(`🎯 CONSISTANCE GLOBALE: ${globalConsistency}%`); console.log(` Cas consistants: ${totalConsistentCases}/${totalCases}`); if (globalConsistency >= 90) { console.log('\n🎉 SYSTÈME TRÈS FIABLE!'); console.log('✅ Scoring IA consistant et prévisible'); } else if (globalConsistency >= 70) { console.log('\n✅ SYSTÈME ACCEPTABLE'); console.log('⚠️ Quelques variations mais utilisable'); } else { console.log('\n❌ SYSTÈME PROBLÉMATIQUE'); console.log('⚠️ Trop de variations, scoring imprévisible'); } // Détails des problèmes const problematicCases = Object.entries(allResults).filter(([name, stats]) => parseFloat(stats.consistency) < 80); if (problematicCases.length > 0) { console.log('\n🔍 CAS PROBLÉMATIQUES:'); problematicCases.forEach(([name, stats]) => { console.log(` ❌ ${name}: ${stats.consistency}% de consistance`); console.log(` Scores: [${stats.scores.slice(0, 5).join(', ')}...]`); }); } return allResults; } testRealConsistency().catch(console.error);