/** * FINAL VALIDATION TEST - REAL CONTENT WITH BOTH PROVIDERS * Test everything that matters for production use */ async function finalValidationTest() { console.log('šŸŽÆ FINAL VALIDATION TEST - PRODUCTION SCENARIOS\n'); console.log('===========================================\n'); const { default: LLMValidator } = await import('./src/DRS/services/LLMValidator.js'); const llmValidator = new LLMValidator(); // Real educational content for testing const realScenarios = [ { name: 'Text Comprehension - Good Answer', text: 'Climate change refers to long-term shifts in global temperatures and weather patterns. While climate change is a natural phenomenon, scientific evidence shows that human activities since the 1800s have been the main driver of climate change.', userAnswer: 'Climate change is caused by human activities since the 1800s and affects global temperatures and weather.', expectedScore: 'high', type: 'text' }, { name: 'Text Comprehension - Poor Answer', text: 'Climate change refers to long-term shifts in global temperatures and weather patterns.', userAnswer: 'Cats are fluffy animals', expectedScore: 'low', type: 'text' }, { name: 'Grammar - Correct', original: 'I am going to the store', userCorrection: 'I am going to the store', expectedScore: 'high', type: 'grammar' }, { name: 'Grammar - Needs Work', original: 'I are going to store', userCorrection: 'I are going to store', expectedScore: 'low', type: 'grammar' }, { name: 'Translation - Excellent', original: 'Good morning', translation: 'Bonjour', fromLang: 'en', toLang: 'fr', expectedScore: 'high', type: 'translation' } ]; const results = { openai: { tests: [], errors: [] }, deepseek: { tests: [], errors: [] }, summary: { totalTests: 0, passedTests: 0 } }; console.log('1ļøāƒ£ TESTING WITH OPENAI (default provider)\n'); for (const scenario of realScenarios) { try { console.log(`šŸ“‹ Testing: ${scenario.name}`); let result; if (scenario.type === 'text') { result = await llmValidator.validateTextComprehension( scenario.text, scenario.userAnswer, { language: 'en', level: 'intermediate' } ); } else if (scenario.type === 'grammar') { result = await llmValidator.validateGrammar( scenario.original, { userCorrection: scenario.userCorrection } ); } else if (scenario.type === 'translation') { result = await llmValidator.validateTranslation( scenario.original, scenario.translation, { fromLang: scenario.fromLang, toLang: scenario.toLang } ); } const testResult = { scenario: scenario.name, provider: result.provider, score: result.score, expectedScore: scenario.expectedScore, scoreAppropriate: scenario.expectedScore === 'high' ? result.score > 70 : result.score < 50, hasFeedback: !!result.feedback, success: !!result.score }; results.openai.tests.push(testResult); results.summary.totalTests++; if (testResult.success && testResult.scoreAppropriate) results.summary.passedTests++; console.log(` āœ… Provider: ${result.provider}, Score: ${result.score}, Appropriate: ${testResult.scoreAppropriate}`); // Wait between calls to avoid rate limiting await new Promise(resolve => setTimeout(resolve, 1500)); } catch (error) { console.log(` āŒ Failed: ${error.message}`); results.openai.errors.push({ scenario: scenario.name, error: error.message }); } } console.log('\n2ļøāƒ£ TESTING WITH DEEPSEEK (forced provider)\n'); // Test with DeepSeek for comparison const { default: IAEngine } = await import('./src/DRS/services/IAEngine.js'); const iaEngine = new IAEngine(); const keyScenarios = realScenarios.slice(0, 2); // Test 2 scenarios with DeepSeek for (const scenario of keyScenarios) { try { console.log(`šŸ“‹ Testing with DeepSeek: ${scenario.name}`); if (scenario.type === 'text') { // Direct test with IAEngine to force DeepSeek const result = await iaEngine.validateEducationalContent( `Evaluate this text comprehension response. Text: "${scenario.text}" Student answer: "${scenario.userAnswer}" Rate from 0-100 and provide feedback.`, { preferredProvider: 'deepseek', language: 'en', exerciseType: 'text-analysis' } ); const testResult = { scenario: scenario.name, provider: result.provider, hasContent: !!result.content, success: result.provider === 'deepseek' }; results.deepseek.tests.push(testResult); console.log(` āœ… Provider: ${result.provider}, Has Content: ${testResult.hasContent}`); } await new Promise(resolve => setTimeout(resolve, 2000)); } catch (error) { console.log(` āŒ DeepSeek failed: ${error.message}`); results.deepseek.errors.push({ scenario: scenario.name, error: error.message }); } } console.log('\nšŸ“Š FINAL VALIDATION RESULTS:'); console.log('============================='); console.log('\nšŸ¤– OpenAI Results:'); console.log(` Total tests: ${results.openai.tests.length}`); console.log(` Successful: ${results.openai.tests.filter(t => t.success).length}`); console.log(` Appropriate scoring: ${results.openai.tests.filter(t => t.scoreAppropriate).length}`); console.log(` Errors: ${results.openai.errors.length}`); console.log('\nšŸ¤– DeepSeek Results:'); console.log(` Total tests: ${results.deepseek.tests.length}`); console.log(` Successful: ${results.deepseek.tests.filter(t => t.success).length}`); console.log(` Errors: ${results.deepseek.errors.length}`); console.log('\nšŸŽÆ Overall Summary:'); console.log(` Total scenarios tested: ${results.summary.totalTests}`); console.log(` Passed with appropriate scoring: ${results.summary.passedTests}`); console.log(` Success rate: ${((results.summary.passedTests / results.summary.totalTests) * 100).toFixed(1)}%`); // Show specific results for debugging console.log('\nšŸ“‹ Detailed Results:'); results.openai.tests.forEach(test => { const status = test.success && test.scoreAppropriate ? 'āœ…' : 'āŒ'; console.log(` ${status} ${test.scenario}: Score ${test.score} (expected ${test.expectedScore})`); }); const allSystemsWorking = results.summary.passedTests > results.summary.totalTests * 0.7 && results.deepseek.tests.some(t => t.success); console.log('\nšŸš€ SYSTEM STATUS:'); console.log(` AI Integration: ${allSystemsWorking ? 'FULLY OPERATIONAL' : 'NEEDS ATTENTION'}`); console.log(` OpenAI: ${results.openai.tests.length > 0 ? 'WORKING' : 'FAILED'}`); console.log(` DeepSeek: ${results.deepseek.tests.some(t => t.success) ? 'WORKING' : 'FAILED'}`); console.log(` Fallback System: ${results.openai.tests.length > 0 && results.deepseek.tests.length > 0 ? 'CONFIGURED' : 'NOT TESTED'}`); return results; } // Execute final validation finalValidationTest().catch(error => { console.error('āŒ Final validation failed:', error); process.exit(1); });