Class_generator/tests/ai-validation/test-final-validation.js

/**
 * FINAL VALIDATION TEST - REAL CONTENT WITH BOTH PROVIDERS
 * Test everything that matters for production use
 */

async function finalValidationTest() {
    console.log('🎯 FINAL VALIDATION TEST - PRODUCTION SCENARIOS\n');
    console.log('===========================================\n');

    const { default: LLMValidator } = await import('./src/DRS/services/LLMValidator.js');
    const llmValidator = new LLMValidator();

    // Real educational content for testing
    const realScenarios = [
        {
            name: 'Text Comprehension - Good Answer',
            text: 'Climate change refers to long-term shifts in global temperatures and weather patterns. While climate change is a natural phenomenon, scientific evidence shows that human activities since the 1800s have been the main driver of climate change.',
            userAnswer: 'Climate change is caused by human activities since the 1800s and affects global temperatures and weather.',
            expectedScore: 'high',
            type: 'text'
        },
        {
            name: 'Text Comprehension - Poor Answer',
            text: 'Climate change refers to long-term shifts in global temperatures and weather patterns.',
            userAnswer: 'Cats are fluffy animals',
            expectedScore: 'low',
            type: 'text'
        },
        {
            name: 'Grammar - Correct',
            original: 'I am going to the store',
            userCorrection: 'I am going to the store',
            expectedScore: 'high',
            type: 'grammar'
        },
        {
            name: 'Grammar - Needs Work',
            original: 'I are going to store',
            userCorrection: 'I are going to store',
            expectedScore: 'low',
            type: 'grammar'
        },
        {
            name: 'Translation - Excellent',
            original: 'Good morning',
            translation: 'Bonjour',
            fromLang: 'en',
            toLang: 'fr',
            expectedScore: 'high',
            type: 'translation'
        }
    ];

    const results = {
        openai: { tests: [], errors: [] },
        deepseek: { tests: [], errors: [] },
        summary: { totalTests: 0, passedTests: 0 }
    };

    console.log('1️⃣ TESTING WITH OPENAI (default provider)\n');

    for (const scenario of realScenarios) {
        try {
            console.log(`📋 Testing: ${scenario.name}`);
            let result;

            if (scenario.type === 'text') {
                result = await llmValidator.validateTextComprehension(
                    scenario.text,
                    scenario.userAnswer,
                    { language: 'en', level: 'intermediate' }
                );
            } else if (scenario.type === 'grammar') {
                result = await llmValidator.validateGrammar(
                    scenario.original,
                    { userCorrection: scenario.userCorrection }
                );
            } else if (scenario.type === 'translation') {
                result = await llmValidator.validateTranslation(
                    scenario.original,
                    scenario.translation,
                    { fromLang: scenario.fromLang, toLang: scenario.toLang }
                );
            }

            const testResult = {
                scenario: scenario.name,
                provider: result.provider,
                score: result.score,
                expectedScore: scenario.expectedScore,
                scoreAppropriate: scenario.expectedScore === 'high' ? result.score > 70 : result.score < 50,
                hasFeedback: !!result.feedback,
                success: !!result.score
            };

            results.openai.tests.push(testResult);
            results.summary.totalTests++;
            if (testResult.success && testResult.scoreAppropriate) results.summary.passedTests++;

            console.log(`  ✅ Provider: ${result.provider}, Score: ${result.score}, Appropriate: ${testResult.scoreAppropriate}`);

            // Wait between calls to avoid rate limiting
            await new Promise(resolve => setTimeout(resolve, 1500));

        } catch (error) {
            console.log(`  ❌ Failed: ${error.message}`);
            results.openai.errors.push({ scenario: scenario.name, error: error.message });
        }
    }

    console.log('\n2️⃣ TESTING WITH DEEPSEEK (forced provider)\n');

    // Test with DeepSeek for comparison
    const { default: IAEngine } = await import('./src/DRS/services/IAEngine.js');
    const iaEngine = new IAEngine();

    const keyScenarios = realScenarios.slice(0, 2); // Test 2 scenarios with DeepSeek

    for (const scenario of keyScenarios) {
        try {
            console.log(`📋 Testing with DeepSeek: ${scenario.name}`);

            if (scenario.type === 'text') {
                // Direct test with IAEngine to force DeepSeek
                const result = await iaEngine.validateEducationalContent(
                    `Evaluate this text comprehension response. Text: "${scenario.text}" Student answer: "${scenario.userAnswer}" Rate from 0-100 and provide feedback.`,
                    {
                        preferredProvider: 'deepseek',
                        language: 'en',
                        exerciseType: 'text-analysis'
                    }
                );

                const testResult = {
                    scenario: scenario.name,
                    provider: result.provider,
                    hasContent: !!result.content,
                    success: result.provider === 'deepseek'
                };

                results.deepseek.tests.push(testResult);
                console.log(`  ✅ Provider: ${result.provider}, Has Content: ${testResult.hasContent}`);
            }

            await new Promise(resolve => setTimeout(resolve, 2000));

        } catch (error) {
            console.log(`  ❌ DeepSeek failed: ${error.message}`);
            results.deepseek.errors.push({ scenario: scenario.name, error: error.message });
        }
    }

    console.log('\n📊 FINAL VALIDATION RESULTS:');
    console.log('=============================');

    console.log('\n🤖 OpenAI Results:');
    console.log(`  Total tests: ${results.openai.tests.length}`);
    console.log(`  Successful: ${results.openai.tests.filter(t => t.success).length}`);
    console.log(`  Appropriate scoring: ${results.openai.tests.filter(t => t.scoreAppropriate).length}`);
    console.log(`  Errors: ${results.openai.errors.length}`);

    console.log('\n🤖 DeepSeek Results:');
    console.log(`  Total tests: ${results.deepseek.tests.length}`);
    console.log(`  Successful: ${results.deepseek.tests.filter(t => t.success).length}`);
    console.log(`  Errors: ${results.deepseek.errors.length}`);

    console.log('\n🎯 Overall Summary:');
    console.log(`  Total scenarios tested: ${results.summary.totalTests}`);
    console.log(`  Passed with appropriate scoring: ${results.summary.passedTests}`);
    console.log(`  Success rate: ${((results.summary.passedTests / results.summary.totalTests) * 100).toFixed(1)}%`);

    // Show specific results for debugging
    console.log('\n📋 Detailed Results:');
    results.openai.tests.forEach(test => {
        const status = test.success && test.scoreAppropriate ? '✅' : '❌';
        console.log(`  ${status} ${test.scenario}: Score ${test.score} (expected ${test.expectedScore})`);
    });

    const allSystemsWorking = results.summary.passedTests > results.summary.totalTests * 0.7 &&
                             results.deepseek.tests.some(t => t.success);

    console.log('\n🚀 SYSTEM STATUS:');
    console.log(`  AI Integration: ${allSystemsWorking ? 'FULLY OPERATIONAL' : 'NEEDS ATTENTION'}`);
    console.log(`  OpenAI: ${results.openai.tests.length > 0 ? 'WORKING' : 'FAILED'}`);
    console.log(`  DeepSeek: ${results.deepseek.tests.some(t => t.success) ? 'WORKING' : 'FAILED'}`);
    console.log(`  Fallback System: ${results.openai.tests.length > 0 && results.deepseek.tests.length > 0 ? 'CONFIGURED' : 'NOT TESTED'}`);

    return results;
}

// Execute final validation
finalValidationTest().catch(error => {
    console.error('❌ Final validation failed:', error);
    process.exit(1);
});