Class_generator/tests/ai-validation/test-final-validation.js
StillHammer f5cef0c913 Add comprehensive testing suite with UI/UX and E2E integration tests
- Create complete integration test system (test-integration.js)
- Add UI/UX interaction testing with real event simulation (test-uiux-integration.js)
- Implement end-to-end scenario testing for user journeys (test-e2e-scenarios.js)
- Add console testing commands for rapid development testing (test-console-commands.js)
- Create comprehensive test guide documentation (TEST-GUIDE.md)
- Integrate test buttons in debug panel (F12 → 3 test types)
- Add vocabulary modal two-progress-bar system integration
- Fix flashcard retry system for "don't know" cards
- Update IntelligentSequencer for task distribution validation

🧪 Testing Coverage:
- 35+ integration tests (architecture/modules)
- 20+ UI/UX tests (real user interactions)
- 5 E2E scenarios (complete user journeys)
- Console commands for rapid testing
- Debug panel integration

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-28 23:04:38 +08:00

195 lines
8.1 KiB
JavaScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* FINAL VALIDATION TEST - REAL CONTENT WITH BOTH PROVIDERS
* Test everything that matters for production use
*/
async function finalValidationTest() {
console.log('🎯 FINAL VALIDATION TEST - PRODUCTION SCENARIOS\n');
console.log('===========================================\n');
const { default: LLMValidator } = await import('./src/DRS/services/LLMValidator.js');
const llmValidator = new LLMValidator();
// Real educational content for testing
const realScenarios = [
{
name: 'Text Comprehension - Good Answer',
text: 'Climate change refers to long-term shifts in global temperatures and weather patterns. While climate change is a natural phenomenon, scientific evidence shows that human activities since the 1800s have been the main driver of climate change.',
userAnswer: 'Climate change is caused by human activities since the 1800s and affects global temperatures and weather.',
expectedScore: 'high',
type: 'text'
},
{
name: 'Text Comprehension - Poor Answer',
text: 'Climate change refers to long-term shifts in global temperatures and weather patterns.',
userAnswer: 'Cats are fluffy animals',
expectedScore: 'low',
type: 'text'
},
{
name: 'Grammar - Correct',
original: 'I am going to the store',
userCorrection: 'I am going to the store',
expectedScore: 'high',
type: 'grammar'
},
{
name: 'Grammar - Needs Work',
original: 'I are going to store',
userCorrection: 'I are going to store',
expectedScore: 'low',
type: 'grammar'
},
{
name: 'Translation - Excellent',
original: 'Good morning',
translation: 'Bonjour',
fromLang: 'en',
toLang: 'fr',
expectedScore: 'high',
type: 'translation'
}
];
const results = {
openai: { tests: [], errors: [] },
deepseek: { tests: [], errors: [] },
summary: { totalTests: 0, passedTests: 0 }
};
console.log('1⃣ TESTING WITH OPENAI (default provider)\n');
for (const scenario of realScenarios) {
try {
console.log(`📋 Testing: ${scenario.name}`);
let result;
if (scenario.type === 'text') {
result = await llmValidator.validateTextComprehension(
scenario.text,
scenario.userAnswer,
{ language: 'en', level: 'intermediate' }
);
} else if (scenario.type === 'grammar') {
result = await llmValidator.validateGrammar(
scenario.original,
{ userCorrection: scenario.userCorrection }
);
} else if (scenario.type === 'translation') {
result = await llmValidator.validateTranslation(
scenario.original,
scenario.translation,
{ fromLang: scenario.fromLang, toLang: scenario.toLang }
);
}
const testResult = {
scenario: scenario.name,
provider: result.provider,
score: result.score,
expectedScore: scenario.expectedScore,
scoreAppropriate: scenario.expectedScore === 'high' ? result.score > 70 : result.score < 50,
hasFeedback: !!result.feedback,
success: !!result.score
};
results.openai.tests.push(testResult);
results.summary.totalTests++;
if (testResult.success && testResult.scoreAppropriate) results.summary.passedTests++;
console.log(` ✅ Provider: ${result.provider}, Score: ${result.score}, Appropriate: ${testResult.scoreAppropriate}`);
// Wait between calls to avoid rate limiting
await new Promise(resolve => setTimeout(resolve, 1500));
} catch (error) {
console.log(` ❌ Failed: ${error.message}`);
results.openai.errors.push({ scenario: scenario.name, error: error.message });
}
}
console.log('\n2⃣ TESTING WITH DEEPSEEK (forced provider)\n');
// Test with DeepSeek for comparison
const { default: IAEngine } = await import('./src/DRS/services/IAEngine.js');
const iaEngine = new IAEngine();
const keyScenarios = realScenarios.slice(0, 2); // Test 2 scenarios with DeepSeek
for (const scenario of keyScenarios) {
try {
console.log(`📋 Testing with DeepSeek: ${scenario.name}`);
if (scenario.type === 'text') {
// Direct test with IAEngine to force DeepSeek
const result = await iaEngine.validateEducationalContent(
`Evaluate this text comprehension response. Text: "${scenario.text}" Student answer: "${scenario.userAnswer}" Rate from 0-100 and provide feedback.`,
{
preferredProvider: 'deepseek',
language: 'en',
exerciseType: 'text-analysis'
}
);
const testResult = {
scenario: scenario.name,
provider: result.provider,
hasContent: !!result.content,
success: result.provider === 'deepseek'
};
results.deepseek.tests.push(testResult);
console.log(` ✅ Provider: ${result.provider}, Has Content: ${testResult.hasContent}`);
}
await new Promise(resolve => setTimeout(resolve, 2000));
} catch (error) {
console.log(` ❌ DeepSeek failed: ${error.message}`);
results.deepseek.errors.push({ scenario: scenario.name, error: error.message });
}
}
console.log('\n📊 FINAL VALIDATION RESULTS:');
console.log('=============================');
console.log('\n🤖 OpenAI Results:');
console.log(` Total tests: ${results.openai.tests.length}`);
console.log(` Successful: ${results.openai.tests.filter(t => t.success).length}`);
console.log(` Appropriate scoring: ${results.openai.tests.filter(t => t.scoreAppropriate).length}`);
console.log(` Errors: ${results.openai.errors.length}`);
console.log('\n🤖 DeepSeek Results:');
console.log(` Total tests: ${results.deepseek.tests.length}`);
console.log(` Successful: ${results.deepseek.tests.filter(t => t.success).length}`);
console.log(` Errors: ${results.deepseek.errors.length}`);
console.log('\n🎯 Overall Summary:');
console.log(` Total scenarios tested: ${results.summary.totalTests}`);
console.log(` Passed with appropriate scoring: ${results.summary.passedTests}`);
console.log(` Success rate: ${((results.summary.passedTests / results.summary.totalTests) * 100).toFixed(1)}%`);
// Show specific results for debugging
console.log('\n📋 Detailed Results:');
results.openai.tests.forEach(test => {
const status = test.success && test.scoreAppropriate ? '✅' : '❌';
console.log(` ${status} ${test.scenario}: Score ${test.score} (expected ${test.expectedScore})`);
});
const allSystemsWorking = results.summary.passedTests > results.summary.totalTests * 0.7 &&
results.deepseek.tests.some(t => t.success);
console.log('\n🚀 SYSTEM STATUS:');
console.log(` AI Integration: ${allSystemsWorking ? 'FULLY OPERATIONAL' : 'NEEDS ATTENTION'}`);
console.log(` OpenAI: ${results.openai.tests.length > 0 ? 'WORKING' : 'FAILED'}`);
console.log(` DeepSeek: ${results.deepseek.tests.some(t => t.success) ? 'WORKING' : 'FAILED'}`);
console.log(` Fallback System: ${results.openai.tests.length > 0 && results.deepseek.tests.length > 0 ? 'CONFIGURED' : 'NOT TESTED'}`);
return results;
}
// Execute final validation
finalValidationTest().catch(error => {
console.error('❌ Final validation failed:', error);
process.exit(1);
});