Class_generator/tests/ai-validation/test-all-outputs.js
StillHammer f5cef0c913 Add comprehensive testing suite with UI/UX and E2E integration tests
- Create complete integration test system (test-integration.js)
- Add UI/UX interaction testing with real event simulation (test-uiux-integration.js)
- Implement end-to-end scenario testing for user journeys (test-e2e-scenarios.js)
- Add console testing commands for rapid development testing (test-console-commands.js)
- Create comprehensive test guide documentation (TEST-GUIDE.md)
- Integrate test buttons in debug panel (F12 → 3 test types)
- Add vocabulary modal two-progress-bar system integration
- Fix flashcard retry system for "don't know" cards
- Update IntelligentSequencer for task distribution validation

🧪 Testing Coverage:
- 35+ integration tests (architecture/modules)
- 20+ UI/UX tests (real user interactions)
- 5 E2E scenarios (complete user journeys)
- Console commands for rapid testing
- Debug panel integration

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-28 23:04:38 +08:00

188 lines
6.9 KiB
JavaScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* COMPREHENSIVE OUTPUT TESTING
* Test ALL modes, ALL exercise types, BOTH providers, CORRECT vs WRONG answers
*/
import { default as IAEngine } from './src/DRS/services/IAEngine.js';
async function testAllOutputs() {
console.log('🎯 COMPREHENSIVE OUTPUT TESTING - ALL MODES, ALL TYPES\n');
console.log('===============================================\n');
const results = {
textanalysis: { openai: {}, deepseek: {} },
grammar: { openai: {}, deepseek: {} },
translation: { openai: {}, deepseek: {} },
summary: { passed: 0, total: 0 }
};
const engine = new IAEngine({
defaultProvider: 'openai',
fallbackProviders: ['deepseek']
});
await new Promise(resolve => setTimeout(resolve, 1000));
// Test scenarios: correct vs wrong answers
const testScenarios = [
{
type: 'text-analysis',
text: 'The Amazon rainforest is the largest tropical rainforest in the world.',
correct: 'Amazon is the biggest rainforest',
wrong: 'Elephants are purple animals'
},
{
type: 'grammar',
original: 'I are going to school',
correct: 'I am going to school',
wrong: 'I are going to school'
},
{
type: 'translation',
original: 'Good morning',
correct: 'Bonjour',
wrong: 'Pizza spaghetti'
}
];
for (const scenario of testScenarios) {
console.log(`\n🧪 TESTING ${scenario.type.toUpperCase()}\n`);
// Test with OpenAI
console.log('1⃣ OpenAI Provider Tests:');
try {
// Test CORRECT answer
let result = await testExerciseType(engine, scenario, 'correct', 'openai');
results[scenario.type.replace('-', '')].openai.correct = {
provider: result.provider,
score: result.score,
appropriate: result.score > 70,
feedback: !!result.feedback
};
await new Promise(resolve => setTimeout(resolve, 2000)); // Rate limiting
// Test WRONG answer
result = await testExerciseType(engine, scenario, 'wrong', 'openai');
results[scenario.type.replace('-', '')].openai.wrong = {
provider: result.provider,
score: result.score,
appropriate: result.score < 50,
feedback: !!result.feedback
};
console.log(`✅ OpenAI ${scenario.type}: Correct=${results[scenario.type.replace('-', '')].openai.correct.score}, Wrong=${results[scenario.type.replace('-', '')].openai.wrong.score}`);
} catch (error) {
console.log(`❌ OpenAI ${scenario.type} failed:`, error.message);
}
await new Promise(resolve => setTimeout(resolve, 3000)); // Rate limiting
// Test with DeepSeek
console.log('\n2⃣ DeepSeek Provider Tests:');
try {
// Test CORRECT answer
let result = await testExerciseType(engine, scenario, 'correct', 'deepseek');
results[scenario.type.replace('-', '')].deepseek.correct = {
provider: result.provider,
score: result.score,
appropriate: result.score > 70,
feedback: !!result.feedback
};
await new Promise(resolve => setTimeout(resolve, 3000));
// Test WRONG answer
result = await testExerciseType(engine, scenario, 'wrong', 'deepseek');
results[scenario.type.replace('-', '')].deepseek.wrong = {
provider: result.provider,
score: result.score,
appropriate: result.score < 50,
feedback: !!result.feedback
};
console.log(`✅ DeepSeek ${scenario.type}: Correct=${results[scenario.type.replace('-', '')].deepseek.correct.score}, Wrong=${results[scenario.type.replace('-', '')].deepseek.wrong.score}`);
} catch (error) {
console.log(`❌ DeepSeek ${scenario.type} failed:`, error.message);
}
await new Promise(resolve => setTimeout(resolve, 2000));
}
// ANALYSIS OF RESULTS
console.log('\n📊 COMPREHENSIVE RESULTS ANALYSIS:');
console.log('=====================================\n');
Object.keys(results).forEach(type => {
if (type === 'summary') return;
console.log(`🧪 ${type.toUpperCase()}:`);
['openai', 'deepseek'].forEach(provider => {
if (results[type][provider].correct && results[type][provider].wrong) {
const correct = results[type][provider].correct;
const wrong = results[type][provider].wrong;
console.log(` ${provider.toUpperCase()}:`);
console.log(` ✅ Correct: ${correct.score} (should be >70: ${correct.appropriate ? 'YES' : 'NO'})`);
console.log(` ❌ Wrong: ${wrong.score} (should be <50: ${wrong.appropriate ? 'YES' : 'NO'})`);
console.log(` 📝 Feedback: ${correct.feedback ? 'YES' : 'NO'}`);
// Count passed tests
if (correct.appropriate && wrong.appropriate && correct.feedback) {
results.summary.passed++;
}
results.summary.total++;
}
});
});
// FINAL VERDICT
console.log('\n🎯 FINAL VERDICT:');
console.log('=================');
console.log(`Passed tests: ${results.summary.passed}/${results.summary.total}`);
console.log(`Success rate: ${((results.summary.passed / results.summary.total) * 100).toFixed(1)}%`);
if (results.summary.passed === results.summary.total) {
console.log('🎉 ALL OUTPUTS SATISFACTORY!');
console.log('✅ Correct answers get high scores');
console.log('✅ Wrong answers get low scores');
console.log('✅ Both providers work correctly');
console.log('✅ All exercise types validated');
} else {
console.log('⚠️ SOME OUTPUTS NEED ATTENTION');
console.log('Check scoring logic or provider responses');
}
return results;
}
async function testExerciseType(engine, scenario, answerType, provider) {
const answer = scenario[answerType];
switch (scenario.type) {
case 'text-analysis':
return await engine.validateComprehension(scenario.text, answer, {
preferredProvider: provider,
exerciseType: 'text'
});
case 'grammar':
return await engine.validateGrammar(answer, {
preferredProvider: provider,
grammarConcepts: {},
languageLevel: 'beginner'
});
case 'translation':
return await engine.validateTranslation(scenario.original, answer, {
preferredProvider: provider,
fromLang: 'en',
toLang: 'fr'
});
}
}
testAllOutputs().catch(console.error);