/** * @file test_stt_engines.cpp * @brief Manual test program for all 4 STT engines * * Tests each STT engine with the same audio file and compares results. * * Usage: ./test_stt_engines */ #include "../../src/shared/audio/ISTTEngine.hpp" #include "../../src/shared/audio/PocketSphinxEngine.hpp" #include "../../src/shared/audio/VoskSTTEngine.hpp" #include "../../src/shared/audio/WhisperCppEngine.hpp" #include "../../src/shared/audio/WhisperAPIEngine.hpp" #include #include #include #include #include using namespace aissia; struct TestResult { std::string engineName; bool available; std::string transcription; double durationMs; std::string error; }; TestResult testEngine(ISTTEngine* engine, const std::string& audioFile) { TestResult result; result.engineName = engine->getEngineName(); result.available = engine->isAvailable(); if (!result.available) { result.error = "Engine not available"; return result; } auto start = std::chrono::high_resolution_clock::now(); try { result.transcription = engine->transcribeFile(audioFile); auto end = std::chrono::high_resolution_clock::now(); result.durationMs = std::chrono::duration(end - start).count(); if (result.transcription.empty()) { result.error = "Empty transcription (file format not supported or processing failed)"; } } catch (const std::exception& e) { result.error = std::string("Exception: ") + e.what(); } return result; } void printResult(const TestResult& result) { std::cout << "\n"; std::cout << "┌─────────────────────────────────────────────────────────\n"; std::cout << "│ Engine: " << result.engineName << "\n"; std::cout << "├─────────────────────────────────────────────────────────\n"; if (!result.available) { std::cout << "│ Status: ❌ NOT AVAILABLE\n"; std::cout << "│ Reason: " << result.error << "\n"; } else if (!result.error.empty()) { std::cout << "│ Status: ⚠️ ERROR\n"; std::cout << "│ Error: " << result.error << "\n"; } else { std::cout << "│ Status: ✅ SUCCESS\n"; std::cout << "│ Duration: " << result.durationMs << " ms\n"; std::cout << "│ Transcription: \"" << result.transcription << "\"\n"; } std::cout << "└─────────────────────────────────────────────────────────\n"; } int main(int argc, char* argv[]) { // Setup logging auto logger = spdlog::stdout_color_mt("test"); spdlog::set_level(spdlog::level::info); // Check arguments if (argc < 2) { std::cerr << "Usage: " << argv[0] << " \n"; std::cerr << "Example: " << argv[0] << " test_audio.mp3\n"; return 1; } std::string audioFile = argv[1]; std::cout << "\n"; std::cout << "╔═══════════════════════════════════════════════════════════╗\n"; std::cout << "║ STT ENGINES TEST - AISSIA Phase 7 ║\n"; std::cout << "╚═══════════════════════════════════════════════════════════╝\n"; std::cout << "\n"; std::cout << "Audio file: " << audioFile << "\n"; std::cout << "\n"; std::cout << "Testing 4 STT engines...\n"; // Prepare engines std::vector>> engines; // 1. PocketSphinx std::cout << "\n[1/4] Initializing PocketSphinx...\n"; engines.push_back({ "PocketSphinx", std::make_unique("/usr/share/pocketsphinx/model/en-us") }); // 2. Vosk std::cout << "[2/4] Initializing Vosk...\n"; engines.push_back({ "Vosk", std::make_unique("./models/vosk-model-small-fr-0.22") }); // 3. Whisper.cpp std::cout << "[3/4] Initializing Whisper.cpp...\n"; engines.push_back({ "Whisper.cpp", std::make_unique("./models/ggml-base.bin") }); // 4. Whisper API std::cout << "[4/4] Initializing Whisper API...\n"; const char* apiKey = std::getenv("OPENAI_API_KEY"); engines.push_back({ "Whisper API", std::make_unique(apiKey ? apiKey : "") }); // Test each engine std::vector results; for (auto& [name, engine] : engines) { std::cout << "\n▶ Testing " << name << "...\n"; results.push_back(testEngine(engine.get(), audioFile)); } // Print results std::cout << "\n\n"; std::cout << "╔═══════════════════════════════════════════════════════════╗\n"; std::cout << "║ RESULTS ║\n"; std::cout << "╚═══════════════════════════════════════════════════════════╝\n"; for (const auto& result : results) { printResult(result); } // Summary std::cout << "\n\n"; std::cout << "╔═══════════════════════════════════════════════════════════╗\n"; std::cout << "║ SUMMARY ║\n"; std::cout << "╚═══════════════════════════════════════════════════════════╝\n"; std::cout << "\n"; int available = 0; int successful = 0; for (const auto& result : results) { if (result.available) available++; if (result.available && result.error.empty()) successful++; } std::cout << "Total engines tested: " << results.size() << "\n"; std::cout << "Engines available: " << available << "/" << results.size() << "\n"; std::cout << "Successful transcriptions: " << successful << "/" << available << "\n"; std::cout << "\n"; if (successful > 0) { std::cout << "✅ STT system is working!\n"; return 0; } else if (available > 0) { std::cout << "⚠️ Some engines available but all failed (check audio file format)\n"; return 1; } else { std::cout << "❌ No STT engines available (install models/libraries)\n"; return 1; } }