// Test de cohérence SANS cache - Prompts complètement différents import { default as IAEngine } from './src/DRS/services/IAEngine.js'; async function testNoCacheConsistency() { console.log('🔄 TEST DE COHÉRENCE SANS CACHE - Prompts uniques\n'); console.log('================================================\n'); // Test cas spécifiques avec délais pour éviter cache const testRounds = [ { name: 'Round 1 - Mauvaise réponse', content: 'The Eiffel Tower is located in Paris, France.', answer: 'Cats are purple and live on the moon', expectedRange: [0, 30] }, { name: 'Round 2 - Bonne réponse', content: 'Tokyo is the capital city of Japan with over 13 million people.', answer: 'Tokyo is Japan\'s capital and has more than 13 million inhabitants', expectedRange: [70, 100] }, { name: 'Round 3 - Mauvaise traduction', original: 'I love books', translation: 'Monkey banana computer', expectedRange: [0, 30] }, { name: 'Round 4 - Bonne traduction', original: 'The weather is nice today', translation: 'Le temps est beau aujourd\'hui', expectedRange: [70, 100] }, { name: 'Round 5 - Autre mauvaise réponse', content: 'New York City has five boroughs: Manhattan, Brooklyn, Queens, Bronx, and Staten Island.', answer: 'Fish swim in chocolate rivers', expectedRange: [0, 30] } ]; const results = []; for (let i = 0; i < testRounds.length; i++) { const round = testRounds[i]; console.log(`🧪 ${round.name}`); // Créer une nouvelle instance pour éviter le cache const engine = new IAEngine({ defaultProvider: 'openai', fallbackProviders: ['deepseek'] }); try { let result; if (round.content) { // Test de compréhension console.log(` Texte: "${round.content}"`); console.log(` Réponse: "${round.answer}"`); result = await engine.validateComprehension(round.content, round.answer, { exerciseType: `comprehension-test-${Date.now()}-${i}` }); } else { // Test de traduction console.log(` Original: "${round.original}"`); console.log(` Traduction: "${round.translation}"`); result = await engine.validateTranslation(round.original, round.translation, { fromLang: 'en', toLang: 'fr', testId: `translation-test-${Date.now()}-${i}` }); } const [min, max] = round.expectedRange; const inRange = result.score >= min && result.score <= max; console.log(` 📊 Score: ${result.score} (attendu: ${min}-${max})`); console.log(` ✅ Dans la plage: ${inRange ? 'OUI' : 'NON'}`); console.log(` 🤖 Provider: ${result.provider}`); console.log(` 💬 Feedback: ${result.feedback?.substring(0, 80)}...\n`); results.push({ name: round.name, score: result.score, expected: round.expectedRange, inRange: inRange, provider: result.provider }); } catch (error) { console.log(` ❌ Erreur: ${error.message}\n`); results.push({ name: round.name, score: 'ERROR', expected: round.expectedRange, inRange: false, error: error.message }); } // Délai important pour éviter rate limiting et cache await new Promise(resolve => setTimeout(resolve, 5000)); } // Analyse finale console.log('📊 ANALYSE FINALE SANS CACHE:'); console.log('============================\n'); const validResults = results.filter(r => typeof r.score === 'number'); const badAnswerResults = validResults.filter(r => r.expected[1] <= 30); // Mauvaises réponses const goodAnswerResults = validResults.filter(r => r.expected[0] >= 70); // Bonnes réponses console.log('🔴 Mauvaises réponses (devraient avoir <30 points):'); badAnswerResults.forEach(r => { console.log(` ${r.inRange ? '✅' : '❌'} ${r.name}: ${r.score} points`); }); console.log('\n🟢 Bonnes réponses (devraient avoir >70 points):'); goodAnswerResults.forEach(r => { console.log(` ${r.inRange ? '✅' : '❌'} ${r.name}: ${r.score} points`); }); const passedTests = validResults.filter(r => r.inRange).length; const totalTests = validResults.length; console.log(`\n🎯 RÉSULTAT FINAL:`); console.log(` Tests réussis: ${passedTests}/${totalTests} (${Math.round((passedTests/totalTests)*100)}%)`); if (passedTests === totalTests) { console.log('\n🎉 SYSTÈME PARFAIT!'); console.log('✅ Toutes les mauvaises réponses reçoivent des scores bas'); console.log('✅ Toutes les bonnes réponses reçoivent des scores élevés'); console.log('✅ Le scoring IA fonctionne correctement'); } else if (passedTests >= totalTests * 0.8) { console.log('\n✅ SYSTÈME ACCEPTABLE'); console.log('La plupart des tests passent, système utilisable'); } else { console.log('\n❌ SYSTÈME PROBLÉMATIQUE'); console.log('Trop de scores inappropriés, besoin d\'ajustements'); } return results; } testNoCacheConsistency().catch(console.error);