Class_generator/tests/ai-validation/test-real-consistency.js
StillHammer f5cef0c913 Add comprehensive testing suite with UI/UX and E2E integration tests
- Create complete integration test system (test-integration.js)
- Add UI/UX interaction testing with real event simulation (test-uiux-integration.js)
- Implement end-to-end scenario testing for user journeys (test-e2e-scenarios.js)
- Add console testing commands for rapid development testing (test-console-commands.js)
- Create comprehensive test guide documentation (TEST-GUIDE.md)
- Integrate test buttons in debug panel (F12 → 3 test types)
- Add vocabulary modal two-progress-bar system integration
- Fix flashcard retry system for "don't know" cards
- Update IntelligentSequencer for task distribution validation

🧪 Testing Coverage:
- 35+ integration tests (architecture/modules)
- 20+ UI/UX tests (real user interactions)
- 5 E2E scenarios (complete user journeys)
- Console commands for rapid testing
- Debug panel integration

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-28 23:04:38 +08:00

180 lines
7.5 KiB
JavaScript

// Test de VRAIE consistance - 10 fois chaque cas pour voir la variance réelle
import { default as IAEngine } from './src/DRS/services/IAEngine.js';
async function testRealConsistency() {
console.log('🔄 TEST DE VRAIE CONSISTANCE - 10 itérations par cas\n');
console.log('================================================\n');
const engine = new IAEngine({
defaultProvider: 'openai',
fallbackProviders: ['deepseek']
});
await new Promise(resolve => setTimeout(resolve, 1000));
// Les 4 cas de test, mais on va les tester 10 fois chacun
const testCases = [
{
name: 'WRONG: Science -> Nonsense',
test: () => engine.validateComprehension(
'Albert Einstein developed the theory of relativity in the early 20th century.',
'Dancing unicorns eat rainbow cookies in space',
{ exerciseType: 'physics-comprehension', timestamp: Date.now() }
),
expectedRange: [0, 30],
type: 'WRONG'
},
{
name: 'CORRECT: History understanding',
test: () => engine.validateComprehension(
'World War II ended in 1945 when Japan surrendered after atomic bombs.',
'World War 2 finished in 1945 when Japan gave up after nuclear attacks',
{ exerciseType: 'history-analysis', timestamp: Date.now() }
),
expectedRange: [70, 100],
type: 'CORRECT'
},
{
name: 'WRONG: French translation nonsense',
test: () => engine.validateTranslation(
'Where is the library?',
'Elephant potato singing moon',
{ fromLang: 'en', toLang: 'fr', context: 'directions', timestamp: Date.now() }
),
expectedRange: [0, 30],
type: 'WRONG'
},
{
name: 'CORRECT: Spanish translation',
test: () => engine.validateTranslation(
'What time is it?',
'¿Qué hora es?',
{ fromLang: 'en', toLang: 'es', context: 'time', timestamp: Date.now() }
),
expectedRange: [70, 100],
type: 'CORRECT'
}
];
const iterations = 10;
const allResults = {};
for (const testCase of testCases) {
console.log(`🧪 ${testCase.name} - Testing ${iterations} times`);
console.log(` Expected: ${testCase.expectedRange[0]}-${testCase.expectedRange[1]} points\n`);
const scores = [];
const providers = [];
const feedbacks = [];
for (let i = 1; i <= iterations; i++) {
try {
console.log(` Round ${i}/10...`);
// Ajout d'un ID unique pour éviter le cache
const uniqueTest = async () => {
if (testCase.name.includes('translation')) {
return testCase.test();
} else {
return testCase.test();
}
};
const result = await uniqueTest();
scores.push(result.score);
providers.push(result.provider);
feedbacks.push(result.feedback?.substring(0, 50));
const [min, max] = testCase.expectedRange;
const inRange = result.score >= min && result.score <= max;
console.log(` Score: ${result.score} ${inRange ? '✅' : '❌'} (${result.provider})`);
} catch (error) {
console.log(` ❌ Error: ${error.message}`);
scores.push('ERROR');
providers.push('ERROR');
feedbacks.push('ERROR');
}
// Délai pour éviter rate limiting et forcer de nouvelles requêtes
await new Promise(resolve => setTimeout(resolve, 3000));
}
// Analyse des résultats pour ce cas
const validScores = scores.filter(s => typeof s === 'number');
const [expectedMin, expectedMax] = testCase.expectedRange;
const stats = {
scores: scores,
providers: providers,
validCount: validScores.length,
average: validScores.length > 0 ? Math.round(validScores.reduce((a, b) => a + b, 0) / validScores.length) : 'N/A',
min: validScores.length > 0 ? Math.min(...validScores) : 'N/A',
max: validScores.length > 0 ? Math.max(...validScores) : 'N/A',
variance: validScores.length > 0 ? Math.max(...validScores) - Math.min(...validScores) : 'N/A',
inRangeCount: validScores.filter(score => score >= expectedMin && score <= expectedMax).length,
consistency: validScores.length > 0 ? (validScores.filter(score => score >= expectedMin && score <= expectedMax).length / validScores.length * 100).toFixed(1) : 'N/A'
};
allResults[testCase.name] = stats;
console.log(`\n 📊 RÉSULTATS pour "${testCase.name}":`);
console.log(` Scores: [${scores.join(', ')}]`);
console.log(` Moyenne: ${stats.average}`);
console.log(` Min-Max: ${stats.min}-${stats.max} (variance: ${stats.variance})`);
console.log(` Dans la plage: ${stats.inRangeCount}/${stats.validCount} (${stats.consistency}%)`);
console.log(` Consistance: ${parseFloat(stats.consistency) >= 80 ? '✅ BONNE' : '❌ PROBLÉMATIQUE'}\n`);
console.log(' ─────────────────────────────────────────────────────\n');
}
// ANALYSE FINALE GLOBALE
console.log('🎯 ANALYSE FINALE DE CONSISTANCE:');
console.log('==================================\n');
let totalConsistentCases = 0;
let totalCases = 0;
Object.entries(allResults).forEach(([name, stats]) => {
totalCases++;
const isConsistent = parseFloat(stats.consistency) >= 80;
if (isConsistent) totalConsistentCases++;
const status = isConsistent ? '✅' : '❌';
console.log(`${status} ${name}:`);
console.log(` Consistance: ${stats.consistency}% (${stats.inRangeCount}/${stats.validCount})`);
console.log(` Variance: ${stats.variance} points`);
console.log(` Moyenne: ${stats.average}\n`);
});
const globalConsistency = (totalConsistentCases / totalCases * 100).toFixed(1);
console.log(`🎯 CONSISTANCE GLOBALE: ${globalConsistency}%`);
console.log(` Cas consistants: ${totalConsistentCases}/${totalCases}`);
if (globalConsistency >= 90) {
console.log('\n🎉 SYSTÈME TRÈS FIABLE!');
console.log('✅ Scoring IA consistant et prévisible');
} else if (globalConsistency >= 70) {
console.log('\n✅ SYSTÈME ACCEPTABLE');
console.log('⚠️ Quelques variations mais utilisable');
} else {
console.log('\n❌ SYSTÈME PROBLÉMATIQUE');
console.log('⚠️ Trop de variations, scoring imprévisible');
}
// Détails des problèmes
const problematicCases = Object.entries(allResults).filter(([name, stats]) => parseFloat(stats.consistency) < 80);
if (problematicCases.length > 0) {
console.log('\n🔍 CAS PROBLÉMATIQUES:');
problematicCases.forEach(([name, stats]) => {
console.log(`${name}: ${stats.consistency}% de consistance`);
console.log(` Scores: [${stats.scores.slice(0, 5).join(', ')}...]`);
});
}
return allResults;
}
testRealConsistency().catch(console.error);