/** * COMPREHENSIVE OUTPUT TESTING * Test ALL modes, ALL exercise types, BOTH providers, CORRECT vs WRONG answers */ import { default as IAEngine } from './src/DRS/services/IAEngine.js'; async function testAllOutputs() { console.log('๐ŸŽฏ COMPREHENSIVE OUTPUT TESTING - ALL MODES, ALL TYPES\n'); console.log('===============================================\n'); const results = { textanalysis: { openai: {}, deepseek: {} }, grammar: { openai: {}, deepseek: {} }, translation: { openai: {}, deepseek: {} }, summary: { passed: 0, total: 0 } }; const engine = new IAEngine({ defaultProvider: 'openai', fallbackProviders: ['deepseek'] }); await new Promise(resolve => setTimeout(resolve, 1000)); // Test scenarios: correct vs wrong answers const testScenarios = [ { type: 'text-analysis', text: 'The Amazon rainforest is the largest tropical rainforest in the world.', correct: 'Amazon is the biggest rainforest', wrong: 'Elephants are purple animals' }, { type: 'grammar', original: 'I are going to school', correct: 'I am going to school', wrong: 'I are going to school' }, { type: 'translation', original: 'Good morning', correct: 'Bonjour', wrong: 'Pizza spaghetti' } ]; for (const scenario of testScenarios) { console.log(`\n๐Ÿงช TESTING ${scenario.type.toUpperCase()}\n`); // Test with OpenAI console.log('1๏ธโƒฃ OpenAI Provider Tests:'); try { // Test CORRECT answer let result = await testExerciseType(engine, scenario, 'correct', 'openai'); results[scenario.type.replace('-', '')].openai.correct = { provider: result.provider, score: result.score, appropriate: result.score > 70, feedback: !!result.feedback }; await new Promise(resolve => setTimeout(resolve, 2000)); // Rate limiting // Test WRONG answer result = await testExerciseType(engine, scenario, 'wrong', 'openai'); results[scenario.type.replace('-', '')].openai.wrong = { provider: result.provider, score: result.score, appropriate: result.score < 50, feedback: !!result.feedback }; console.log(`โœ… OpenAI ${scenario.type}: Correct=${results[scenario.type.replace('-', '')].openai.correct.score}, Wrong=${results[scenario.type.replace('-', '')].openai.wrong.score}`); } catch (error) { console.log(`โŒ OpenAI ${scenario.type} failed:`, error.message); } await new Promise(resolve => setTimeout(resolve, 3000)); // Rate limiting // Test with DeepSeek console.log('\n2๏ธโƒฃ DeepSeek Provider Tests:'); try { // Test CORRECT answer let result = await testExerciseType(engine, scenario, 'correct', 'deepseek'); results[scenario.type.replace('-', '')].deepseek.correct = { provider: result.provider, score: result.score, appropriate: result.score > 70, feedback: !!result.feedback }; await new Promise(resolve => setTimeout(resolve, 3000)); // Test WRONG answer result = await testExerciseType(engine, scenario, 'wrong', 'deepseek'); results[scenario.type.replace('-', '')].deepseek.wrong = { provider: result.provider, score: result.score, appropriate: result.score < 50, feedback: !!result.feedback }; console.log(`โœ… DeepSeek ${scenario.type}: Correct=${results[scenario.type.replace('-', '')].deepseek.correct.score}, Wrong=${results[scenario.type.replace('-', '')].deepseek.wrong.score}`); } catch (error) { console.log(`โŒ DeepSeek ${scenario.type} failed:`, error.message); } await new Promise(resolve => setTimeout(resolve, 2000)); } // ANALYSIS OF RESULTS console.log('\n๐Ÿ“Š COMPREHENSIVE RESULTS ANALYSIS:'); console.log('=====================================\n'); Object.keys(results).forEach(type => { if (type === 'summary') return; console.log(`๐Ÿงช ${type.toUpperCase()}:`); ['openai', 'deepseek'].forEach(provider => { if (results[type][provider].correct && results[type][provider].wrong) { const correct = results[type][provider].correct; const wrong = results[type][provider].wrong; console.log(` ${provider.toUpperCase()}:`); console.log(` โœ… Correct: ${correct.score} (should be >70: ${correct.appropriate ? 'YES' : 'NO'})`); console.log(` โŒ Wrong: ${wrong.score} (should be <50: ${wrong.appropriate ? 'YES' : 'NO'})`); console.log(` ๐Ÿ“ Feedback: ${correct.feedback ? 'YES' : 'NO'}`); // Count passed tests if (correct.appropriate && wrong.appropriate && correct.feedback) { results.summary.passed++; } results.summary.total++; } }); }); // FINAL VERDICT console.log('\n๐ŸŽฏ FINAL VERDICT:'); console.log('================='); console.log(`Passed tests: ${results.summary.passed}/${results.summary.total}`); console.log(`Success rate: ${((results.summary.passed / results.summary.total) * 100).toFixed(1)}%`); if (results.summary.passed === results.summary.total) { console.log('๐ŸŽ‰ ALL OUTPUTS SATISFACTORY!'); console.log('โœ… Correct answers get high scores'); console.log('โœ… Wrong answers get low scores'); console.log('โœ… Both providers work correctly'); console.log('โœ… All exercise types validated'); } else { console.log('โš ๏ธ SOME OUTPUTS NEED ATTENTION'); console.log('Check scoring logic or provider responses'); } return results; } async function testExerciseType(engine, scenario, answerType, provider) { const answer = scenario[answerType]; switch (scenario.type) { case 'text-analysis': return await engine.validateComprehension(scenario.text, answer, { preferredProvider: provider, exerciseType: 'text' }); case 'grammar': return await engine.validateGrammar(answer, { preferredProvider: provider, grammarConcepts: {}, languageLevel: 'beginner' }); case 'translation': return await engine.validateTranslation(scenario.original, answer, { preferredProvider: provider, fromLang: 'en', toLang: 'fr' }); } } testAllOutputs().catch(console.error);