Class_generator/tests/ai-validation/test-real-consistency.js

// Test de VRAIE consistance - 10 fois chaque cas pour voir la variance réelle
import { default as IAEngine } from './src/DRS/services/IAEngine.js';

async function testRealConsistency() {
    console.log('🔄 TEST DE VRAIE CONSISTANCE - 10 itérations par cas\n');
    console.log('================================================\n');

    const engine = new IAEngine({
        defaultProvider: 'openai',
        fallbackProviders: ['deepseek']
    });

    await new Promise(resolve => setTimeout(resolve, 1000));

    // Les 4 cas de test, mais on va les tester 10 fois chacun
    const testCases = [
        {
            name: 'WRONG: Science -> Nonsense',
            test: () => engine.validateComprehension(
                'Albert Einstein developed the theory of relativity in the early 20th century.',
                'Dancing unicorns eat rainbow cookies in space',
                { exerciseType: 'physics-comprehension', timestamp: Date.now() }
            ),
            expectedRange: [0, 30],
            type: 'WRONG'
        },
        {
            name: 'CORRECT: History understanding',
            test: () => engine.validateComprehension(
                'World War II ended in 1945 when Japan surrendered after atomic bombs.',
                'World War 2 finished in 1945 when Japan gave up after nuclear attacks',
                { exerciseType: 'history-analysis', timestamp: Date.now() }
            ),
            expectedRange: [70, 100],
            type: 'CORRECT'
        },
        {
            name: 'WRONG: French translation nonsense',
            test: () => engine.validateTranslation(
                'Where is the library?',
                'Elephant potato singing moon',
                { fromLang: 'en', toLang: 'fr', context: 'directions', timestamp: Date.now() }
            ),
            expectedRange: [0, 30],
            type: 'WRONG'
        },
        {
            name: 'CORRECT: Spanish translation',
            test: () => engine.validateTranslation(
                'What time is it?',
                '¿Qué hora es?',
                { fromLang: 'en', toLang: 'es', context: 'time', timestamp: Date.now() }
            ),
            expectedRange: [70, 100],
            type: 'CORRECT'
        }
    ];

    const iterations = 10;
    const allResults = {};

    for (const testCase of testCases) {
        console.log(`🧪 ${testCase.name} - Testing ${iterations} times`);
        console.log(`   Expected: ${testCase.expectedRange[0]}-${testCase.expectedRange[1]} points\n`);

        const scores = [];
        const providers = [];
        const feedbacks = [];

        for (let i = 1; i <= iterations; i++) {
            try {
                console.log(`   Round ${i}/10...`);

                // Ajout d'un ID unique pour éviter le cache
                const uniqueTest = async () => {
                    if (testCase.name.includes('translation')) {
                        return testCase.test();
                    } else {
                        return testCase.test();
                    }
                };

                const result = await uniqueTest();
                scores.push(result.score);
                providers.push(result.provider);
                feedbacks.push(result.feedback?.substring(0, 50));

                const [min, max] = testCase.expectedRange;
                const inRange = result.score >= min && result.score <= max;

                console.log(`     Score: ${result.score} ${inRange ? '✅' : '❌'} (${result.provider})`);

            } catch (error) {
                console.log(`     ❌ Error: ${error.message}`);
                scores.push('ERROR');
                providers.push('ERROR');
                feedbacks.push('ERROR');
            }

            // Délai pour éviter rate limiting et forcer de nouvelles requêtes
            await new Promise(resolve => setTimeout(resolve, 3000));
        }

        // Analyse des résultats pour ce cas
        const validScores = scores.filter(s => typeof s === 'number');
        const [expectedMin, expectedMax] = testCase.expectedRange;

        const stats = {
            scores: scores,
            providers: providers,
            validCount: validScores.length,
            average: validScores.length > 0 ? Math.round(validScores.reduce((a, b) => a + b, 0) / validScores.length) : 'N/A',
            min: validScores.length > 0 ? Math.min(...validScores) : 'N/A',
            max: validScores.length > 0 ? Math.max(...validScores) : 'N/A',
            variance: validScores.length > 0 ? Math.max(...validScores) - Math.min(...validScores) : 'N/A',
            inRangeCount: validScores.filter(score => score >= expectedMin && score <= expectedMax).length,
            consistency: validScores.length > 0 ? (validScores.filter(score => score >= expectedMin && score <= expectedMax).length / validScores.length * 100).toFixed(1) : 'N/A'
        };

        allResults[testCase.name] = stats;

        console.log(`\n   📊 RÉSULTATS pour "${testCase.name}":`);
        console.log(`   Scores: [${scores.join(', ')}]`);
        console.log(`   Moyenne: ${stats.average}`);
        console.log(`   Min-Max: ${stats.min}-${stats.max} (variance: ${stats.variance})`);
        console.log(`   Dans la plage: ${stats.inRangeCount}/${stats.validCount} (${stats.consistency}%)`);
        console.log(`   Consistance: ${parseFloat(stats.consistency) >= 80 ? '✅ BONNE' : '❌ PROBLÉMATIQUE'}\n`);

        console.log('   ─────────────────────────────────────────────────────\n');
    }

    // ANALYSE FINALE GLOBALE
    console.log('🎯 ANALYSE FINALE DE CONSISTANCE:');
    console.log('==================================\n');

    let totalConsistentCases = 0;
    let totalCases = 0;

    Object.entries(allResults).forEach(([name, stats]) => {
        totalCases++;
        const isConsistent = parseFloat(stats.consistency) >= 80;
        if (isConsistent) totalConsistentCases++;

        const status = isConsistent ? '✅' : '❌';
        console.log(`${status} ${name}:`);
        console.log(`    Consistance: ${stats.consistency}% (${stats.inRangeCount}/${stats.validCount})`);
        console.log(`    Variance: ${stats.variance} points`);
        console.log(`    Moyenne: ${stats.average}\n`);
    });

    const globalConsistency = (totalConsistentCases / totalCases * 100).toFixed(1);

    console.log(`🎯 CONSISTANCE GLOBALE: ${globalConsistency}%`);
    console.log(`   Cas consistants: ${totalConsistentCases}/${totalCases}`);

    if (globalConsistency >= 90) {
        console.log('\n🎉 SYSTÈME TRÈS FIABLE!');
        console.log('✅ Scoring IA consistant et prévisible');
    } else if (globalConsistency >= 70) {
        console.log('\n✅ SYSTÈME ACCEPTABLE');
        console.log('⚠️ Quelques variations mais utilisable');
    } else {
        console.log('\n❌ SYSTÈME PROBLÉMATIQUE');
        console.log('⚠️ Trop de variations, scoring imprévisible');
    }

    // Détails des problèmes
    const problematicCases = Object.entries(allResults).filter(([name, stats]) => parseFloat(stats.consistency) < 80);
    if (problematicCases.length > 0) {
        console.log('\n🔍 CAS PROBLÉMATIQUES:');
        problematicCases.forEach(([name, stats]) => {
            console.log(`   ❌ ${name}: ${stats.consistency}% de consistance`);
            console.log(`      Scores: [${stats.scores.slice(0, 5).join(', ')}...]`);
        });
    }

    return allResults;
}

testRealConsistency().catch(console.error);