Class_generator/tests/ai-validation/test-strict-scoring.js

// Test strict scoring for wrong answers
import { default as IAEngine } from './src/DRS/services/IAEngine.js';

async function testStrictScoring() {
    console.log('🎯 STRICT SCORING TEST - Wrong answers should get low scores\n');

    const engine = new IAEngine({
        defaultProvider: 'openai',
        fallbackProviders: ['deepseek']
    });

    await new Promise(resolve => setTimeout(resolve, 1000));

    // Test cases that should get LOW scores
    const wrongAnswerTests = [
        {
            type: 'comprehension',
            test: async () => await engine.validateComprehension(
                'The Amazon rainforest is the largest tropical rainforest in the world.',
                'Elephants are purple animals',
                { exerciseType: 'text' }
            ),
            description: 'Comprehension: "Elephants are purple" for Amazon rainforest'
        },
        {
            type: 'translation',
            test: async () => await engine.validateTranslation(
                'Good morning',
                'Pizza spaghetti',
                { fromLang: 'en', toLang: 'fr' }
            ),
            description: 'Translation: "Pizza spaghetti" for "Good morning"'
        }
    ];

    for (const testCase of wrongAnswerTests) {
        try {
            console.log(`\n🧪 Testing: ${testCase.description}`);
            const result = await testCase.test();

            console.log(`📊 Score: ${result.score}`);
            console.log(`✅ Should be <20: ${result.score < 20 ? 'PASS' : 'FAIL'}`);
            console.log(`🤖 Provider: ${result.provider}`);
            console.log(`💬 Feedback: ${result.feedback?.substring(0, 100)}...`);

            if (result.score >= 20) {
                console.log('⚠️ SCORING TOO LENIENT - This should be <20 points!');
            }

        } catch (error) {
            console.log(`❌ Test failed: ${error.message}`);
        }

        await new Promise(resolve => setTimeout(resolve, 3000)); // Rate limiting
    }

    console.log('\n🎯 STRICT SCORING SUMMARY:');
    console.log('- Completely wrong answers should score 0-20 points');
    console.log('- Current prompts include explicit examples of wrong answers');
    console.log('- System prompts emphasize being "strict but fair"');
}

testStrictScoring().catch(console.error);