- Create complete integration test system (test-integration.js) - Add UI/UX interaction testing with real event simulation (test-uiux-integration.js) - Implement end-to-end scenario testing for user journeys (test-e2e-scenarios.js) - Add console testing commands for rapid development testing (test-console-commands.js) - Create comprehensive test guide documentation (TEST-GUIDE.md) - Integrate test buttons in debug panel (F12 → 3 test types) - Add vocabulary modal two-progress-bar system integration - Fix flashcard retry system for "don't know" cards - Update IntelligentSequencer for task distribution validation 🧪 Testing Coverage: - 35+ integration tests (architecture/modules) - 20+ UI/UX tests (real user interactions) - 5 E2E scenarios (complete user journeys) - Console commands for rapid testing - Debug panel integration 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
188 lines
6.9 KiB
JavaScript
188 lines
6.9 KiB
JavaScript
/**
|
||
* COMPREHENSIVE OUTPUT TESTING
|
||
* Test ALL modes, ALL exercise types, BOTH providers, CORRECT vs WRONG answers
|
||
*/
|
||
|
||
import { default as IAEngine } from './src/DRS/services/IAEngine.js';
|
||
|
||
async function testAllOutputs() {
|
||
console.log('🎯 COMPREHENSIVE OUTPUT TESTING - ALL MODES, ALL TYPES\n');
|
||
console.log('===============================================\n');
|
||
|
||
const results = {
|
||
textanalysis: { openai: {}, deepseek: {} },
|
||
grammar: { openai: {}, deepseek: {} },
|
||
translation: { openai: {}, deepseek: {} },
|
||
summary: { passed: 0, total: 0 }
|
||
};
|
||
|
||
const engine = new IAEngine({
|
||
defaultProvider: 'openai',
|
||
fallbackProviders: ['deepseek']
|
||
});
|
||
|
||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||
|
||
// Test scenarios: correct vs wrong answers
|
||
const testScenarios = [
|
||
{
|
||
type: 'text-analysis',
|
||
text: 'The Amazon rainforest is the largest tropical rainforest in the world.',
|
||
correct: 'Amazon is the biggest rainforest',
|
||
wrong: 'Elephants are purple animals'
|
||
},
|
||
{
|
||
type: 'grammar',
|
||
original: 'I are going to school',
|
||
correct: 'I am going to school',
|
||
wrong: 'I are going to school'
|
||
},
|
||
{
|
||
type: 'translation',
|
||
original: 'Good morning',
|
||
correct: 'Bonjour',
|
||
wrong: 'Pizza spaghetti'
|
||
}
|
||
];
|
||
|
||
for (const scenario of testScenarios) {
|
||
console.log(`\n🧪 TESTING ${scenario.type.toUpperCase()}\n`);
|
||
|
||
// Test with OpenAI
|
||
console.log('1️⃣ OpenAI Provider Tests:');
|
||
try {
|
||
// Test CORRECT answer
|
||
let result = await testExerciseType(engine, scenario, 'correct', 'openai');
|
||
results[scenario.type.replace('-', '')].openai.correct = {
|
||
provider: result.provider,
|
||
score: result.score,
|
||
appropriate: result.score > 70,
|
||
feedback: !!result.feedback
|
||
};
|
||
|
||
await new Promise(resolve => setTimeout(resolve, 2000)); // Rate limiting
|
||
|
||
// Test WRONG answer
|
||
result = await testExerciseType(engine, scenario, 'wrong', 'openai');
|
||
results[scenario.type.replace('-', '')].openai.wrong = {
|
||
provider: result.provider,
|
||
score: result.score,
|
||
appropriate: result.score < 50,
|
||
feedback: !!result.feedback
|
||
};
|
||
|
||
console.log(`✅ OpenAI ${scenario.type}: Correct=${results[scenario.type.replace('-', '')].openai.correct.score}, Wrong=${results[scenario.type.replace('-', '')].openai.wrong.score}`);
|
||
|
||
} catch (error) {
|
||
console.log(`❌ OpenAI ${scenario.type} failed:`, error.message);
|
||
}
|
||
|
||
await new Promise(resolve => setTimeout(resolve, 3000)); // Rate limiting
|
||
|
||
// Test with DeepSeek
|
||
console.log('\n2️⃣ DeepSeek Provider Tests:');
|
||
try {
|
||
// Test CORRECT answer
|
||
let result = await testExerciseType(engine, scenario, 'correct', 'deepseek');
|
||
results[scenario.type.replace('-', '')].deepseek.correct = {
|
||
provider: result.provider,
|
||
score: result.score,
|
||
appropriate: result.score > 70,
|
||
feedback: !!result.feedback
|
||
};
|
||
|
||
await new Promise(resolve => setTimeout(resolve, 3000));
|
||
|
||
// Test WRONG answer
|
||
result = await testExerciseType(engine, scenario, 'wrong', 'deepseek');
|
||
results[scenario.type.replace('-', '')].deepseek.wrong = {
|
||
provider: result.provider,
|
||
score: result.score,
|
||
appropriate: result.score < 50,
|
||
feedback: !!result.feedback
|
||
};
|
||
|
||
console.log(`✅ DeepSeek ${scenario.type}: Correct=${results[scenario.type.replace('-', '')].deepseek.correct.score}, Wrong=${results[scenario.type.replace('-', '')].deepseek.wrong.score}`);
|
||
|
||
} catch (error) {
|
||
console.log(`❌ DeepSeek ${scenario.type} failed:`, error.message);
|
||
}
|
||
|
||
await new Promise(resolve => setTimeout(resolve, 2000));
|
||
}
|
||
|
||
// ANALYSIS OF RESULTS
|
||
console.log('\n📊 COMPREHENSIVE RESULTS ANALYSIS:');
|
||
console.log('=====================================\n');
|
||
|
||
Object.keys(results).forEach(type => {
|
||
if (type === 'summary') return;
|
||
|
||
console.log(`🧪 ${type.toUpperCase()}:`);
|
||
|
||
['openai', 'deepseek'].forEach(provider => {
|
||
if (results[type][provider].correct && results[type][provider].wrong) {
|
||
const correct = results[type][provider].correct;
|
||
const wrong = results[type][provider].wrong;
|
||
|
||
console.log(` ${provider.toUpperCase()}:`);
|
||
console.log(` ✅ Correct: ${correct.score} (should be >70: ${correct.appropriate ? 'YES' : 'NO'})`);
|
||
console.log(` ❌ Wrong: ${wrong.score} (should be <50: ${wrong.appropriate ? 'YES' : 'NO'})`);
|
||
console.log(` 📝 Feedback: ${correct.feedback ? 'YES' : 'NO'}`);
|
||
|
||
// Count passed tests
|
||
if (correct.appropriate && wrong.appropriate && correct.feedback) {
|
||
results.summary.passed++;
|
||
}
|
||
results.summary.total++;
|
||
}
|
||
});
|
||
});
|
||
|
||
// FINAL VERDICT
|
||
console.log('\n🎯 FINAL VERDICT:');
|
||
console.log('=================');
|
||
console.log(`Passed tests: ${results.summary.passed}/${results.summary.total}`);
|
||
console.log(`Success rate: ${((results.summary.passed / results.summary.total) * 100).toFixed(1)}%`);
|
||
|
||
if (results.summary.passed === results.summary.total) {
|
||
console.log('🎉 ALL OUTPUTS SATISFACTORY!');
|
||
console.log('✅ Correct answers get high scores');
|
||
console.log('✅ Wrong answers get low scores');
|
||
console.log('✅ Both providers work correctly');
|
||
console.log('✅ All exercise types validated');
|
||
} else {
|
||
console.log('⚠️ SOME OUTPUTS NEED ATTENTION');
|
||
console.log('Check scoring logic or provider responses');
|
||
}
|
||
|
||
return results;
|
||
}
|
||
|
||
async function testExerciseType(engine, scenario, answerType, provider) {
|
||
const answer = scenario[answerType];
|
||
|
||
switch (scenario.type) {
|
||
case 'text-analysis':
|
||
return await engine.validateComprehension(scenario.text, answer, {
|
||
preferredProvider: provider,
|
||
exerciseType: 'text'
|
||
});
|
||
|
||
case 'grammar':
|
||
return await engine.validateGrammar(answer, {
|
||
preferredProvider: provider,
|
||
grammarConcepts: {},
|
||
languageLevel: 'beginner'
|
||
});
|
||
|
||
case 'translation':
|
||
return await engine.validateTranslation(scenario.original, answer, {
|
||
preferredProvider: provider,
|
||
fromLang: 'en',
|
||
toLang: 'fr'
|
||
});
|
||
}
|
||
}
|
||
|
||
testAllOutputs().catch(console.error); |