- Create complete integration test system (test-integration.js) - Add UI/UX interaction testing with real event simulation (test-uiux-integration.js) - Implement end-to-end scenario testing for user journeys (test-e2e-scenarios.js) - Add console testing commands for rapid development testing (test-console-commands.js) - Create comprehensive test guide documentation (TEST-GUIDE.md) - Integrate test buttons in debug panel (F12 → 3 test types) - Add vocabulary modal two-progress-bar system integration - Fix flashcard retry system for "don't know" cards - Update IntelligentSequencer for task distribution validation 🧪 Testing Coverage: - 35+ integration tests (architecture/modules) - 20+ UI/UX tests (real user interactions) - 5 E2E scenarios (complete user journeys) - Console commands for rapid testing - Debug panel integration 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
195 lines
8.1 KiB
JavaScript
195 lines
8.1 KiB
JavaScript
/**
|
||
* FINAL VALIDATION TEST - REAL CONTENT WITH BOTH PROVIDERS
|
||
* Test everything that matters for production use
|
||
*/
|
||
|
||
async function finalValidationTest() {
|
||
console.log('🎯 FINAL VALIDATION TEST - PRODUCTION SCENARIOS\n');
|
||
console.log('===========================================\n');
|
||
|
||
const { default: LLMValidator } = await import('./src/DRS/services/LLMValidator.js');
|
||
const llmValidator = new LLMValidator();
|
||
|
||
// Real educational content for testing
|
||
const realScenarios = [
|
||
{
|
||
name: 'Text Comprehension - Good Answer',
|
||
text: 'Climate change refers to long-term shifts in global temperatures and weather patterns. While climate change is a natural phenomenon, scientific evidence shows that human activities since the 1800s have been the main driver of climate change.',
|
||
userAnswer: 'Climate change is caused by human activities since the 1800s and affects global temperatures and weather.',
|
||
expectedScore: 'high',
|
||
type: 'text'
|
||
},
|
||
{
|
||
name: 'Text Comprehension - Poor Answer',
|
||
text: 'Climate change refers to long-term shifts in global temperatures and weather patterns.',
|
||
userAnswer: 'Cats are fluffy animals',
|
||
expectedScore: 'low',
|
||
type: 'text'
|
||
},
|
||
{
|
||
name: 'Grammar - Correct',
|
||
original: 'I am going to the store',
|
||
userCorrection: 'I am going to the store',
|
||
expectedScore: 'high',
|
||
type: 'grammar'
|
||
},
|
||
{
|
||
name: 'Grammar - Needs Work',
|
||
original: 'I are going to store',
|
||
userCorrection: 'I are going to store',
|
||
expectedScore: 'low',
|
||
type: 'grammar'
|
||
},
|
||
{
|
||
name: 'Translation - Excellent',
|
||
original: 'Good morning',
|
||
translation: 'Bonjour',
|
||
fromLang: 'en',
|
||
toLang: 'fr',
|
||
expectedScore: 'high',
|
||
type: 'translation'
|
||
}
|
||
];
|
||
|
||
const results = {
|
||
openai: { tests: [], errors: [] },
|
||
deepseek: { tests: [], errors: [] },
|
||
summary: { totalTests: 0, passedTests: 0 }
|
||
};
|
||
|
||
console.log('1️⃣ TESTING WITH OPENAI (default provider)\n');
|
||
|
||
for (const scenario of realScenarios) {
|
||
try {
|
||
console.log(`📋 Testing: ${scenario.name}`);
|
||
let result;
|
||
|
||
if (scenario.type === 'text') {
|
||
result = await llmValidator.validateTextComprehension(
|
||
scenario.text,
|
||
scenario.userAnswer,
|
||
{ language: 'en', level: 'intermediate' }
|
||
);
|
||
} else if (scenario.type === 'grammar') {
|
||
result = await llmValidator.validateGrammar(
|
||
scenario.original,
|
||
{ userCorrection: scenario.userCorrection }
|
||
);
|
||
} else if (scenario.type === 'translation') {
|
||
result = await llmValidator.validateTranslation(
|
||
scenario.original,
|
||
scenario.translation,
|
||
{ fromLang: scenario.fromLang, toLang: scenario.toLang }
|
||
);
|
||
}
|
||
|
||
const testResult = {
|
||
scenario: scenario.name,
|
||
provider: result.provider,
|
||
score: result.score,
|
||
expectedScore: scenario.expectedScore,
|
||
scoreAppropriate: scenario.expectedScore === 'high' ? result.score > 70 : result.score < 50,
|
||
hasFeedback: !!result.feedback,
|
||
success: !!result.score
|
||
};
|
||
|
||
results.openai.tests.push(testResult);
|
||
results.summary.totalTests++;
|
||
if (testResult.success && testResult.scoreAppropriate) results.summary.passedTests++;
|
||
|
||
console.log(` ✅ Provider: ${result.provider}, Score: ${result.score}, Appropriate: ${testResult.scoreAppropriate}`);
|
||
|
||
// Wait between calls to avoid rate limiting
|
||
await new Promise(resolve => setTimeout(resolve, 1500));
|
||
|
||
} catch (error) {
|
||
console.log(` ❌ Failed: ${error.message}`);
|
||
results.openai.errors.push({ scenario: scenario.name, error: error.message });
|
||
}
|
||
}
|
||
|
||
console.log('\n2️⃣ TESTING WITH DEEPSEEK (forced provider)\n');
|
||
|
||
// Test with DeepSeek for comparison
|
||
const { default: IAEngine } = await import('./src/DRS/services/IAEngine.js');
|
||
const iaEngine = new IAEngine();
|
||
|
||
const keyScenarios = realScenarios.slice(0, 2); // Test 2 scenarios with DeepSeek
|
||
|
||
for (const scenario of keyScenarios) {
|
||
try {
|
||
console.log(`📋 Testing with DeepSeek: ${scenario.name}`);
|
||
|
||
if (scenario.type === 'text') {
|
||
// Direct test with IAEngine to force DeepSeek
|
||
const result = await iaEngine.validateEducationalContent(
|
||
`Evaluate this text comprehension response. Text: "${scenario.text}" Student answer: "${scenario.userAnswer}" Rate from 0-100 and provide feedback.`,
|
||
{
|
||
preferredProvider: 'deepseek',
|
||
language: 'en',
|
||
exerciseType: 'text-analysis'
|
||
}
|
||
);
|
||
|
||
const testResult = {
|
||
scenario: scenario.name,
|
||
provider: result.provider,
|
||
hasContent: !!result.content,
|
||
success: result.provider === 'deepseek'
|
||
};
|
||
|
||
results.deepseek.tests.push(testResult);
|
||
console.log(` ✅ Provider: ${result.provider}, Has Content: ${testResult.hasContent}`);
|
||
}
|
||
|
||
await new Promise(resolve => setTimeout(resolve, 2000));
|
||
|
||
} catch (error) {
|
||
console.log(` ❌ DeepSeek failed: ${error.message}`);
|
||
results.deepseek.errors.push({ scenario: scenario.name, error: error.message });
|
||
}
|
||
}
|
||
|
||
console.log('\n📊 FINAL VALIDATION RESULTS:');
|
||
console.log('=============================');
|
||
|
||
console.log('\n🤖 OpenAI Results:');
|
||
console.log(` Total tests: ${results.openai.tests.length}`);
|
||
console.log(` Successful: ${results.openai.tests.filter(t => t.success).length}`);
|
||
console.log(` Appropriate scoring: ${results.openai.tests.filter(t => t.scoreAppropriate).length}`);
|
||
console.log(` Errors: ${results.openai.errors.length}`);
|
||
|
||
console.log('\n🤖 DeepSeek Results:');
|
||
console.log(` Total tests: ${results.deepseek.tests.length}`);
|
||
console.log(` Successful: ${results.deepseek.tests.filter(t => t.success).length}`);
|
||
console.log(` Errors: ${results.deepseek.errors.length}`);
|
||
|
||
console.log('\n🎯 Overall Summary:');
|
||
console.log(` Total scenarios tested: ${results.summary.totalTests}`);
|
||
console.log(` Passed with appropriate scoring: ${results.summary.passedTests}`);
|
||
console.log(` Success rate: ${((results.summary.passedTests / results.summary.totalTests) * 100).toFixed(1)}%`);
|
||
|
||
// Show specific results for debugging
|
||
console.log('\n📋 Detailed Results:');
|
||
results.openai.tests.forEach(test => {
|
||
const status = test.success && test.scoreAppropriate ? '✅' : '❌';
|
||
console.log(` ${status} ${test.scenario}: Score ${test.score} (expected ${test.expectedScore})`);
|
||
});
|
||
|
||
const allSystemsWorking = results.summary.passedTests > results.summary.totalTests * 0.7 &&
|
||
results.deepseek.tests.some(t => t.success);
|
||
|
||
console.log('\n🚀 SYSTEM STATUS:');
|
||
console.log(` AI Integration: ${allSystemsWorking ? 'FULLY OPERATIONAL' : 'NEEDS ATTENTION'}`);
|
||
console.log(` OpenAI: ${results.openai.tests.length > 0 ? 'WORKING' : 'FAILED'}`);
|
||
console.log(` DeepSeek: ${results.deepseek.tests.some(t => t.success) ? 'WORKING' : 'FAILED'}`);
|
||
console.log(` Fallback System: ${results.openai.tests.length > 0 && results.deepseek.tests.length > 0 ? 'CONFIGURED' : 'NOT TESTED'}`);
|
||
|
||
return results;
|
||
}
|
||
|
||
// Execute final validation
|
||
finalValidationTest().catch(error => {
|
||
console.error('❌ Final validation failed:', error);
|
||
process.exit(1);
|
||
}); |