/** * @file test_stt_live.cpp * @brief Live STT testing tool - Test all 4 engines */ #include "src/shared/audio/ISTTEngine.hpp" #include #include #include #include #include using namespace aissia; // Helper: Load .env file void loadEnv(const std::string& path = ".env") { std::ifstream file(path); if (!file.is_open()) { spdlog::warn("No .env file found at: {}", path); return; } std::string line; while (std::getline(file, line)) { if (line.empty() || line[0] == '#') continue; auto pos = line.find('='); if (pos != std::string::npos) { std::string key = line.substr(0, pos); std::string value = line.substr(pos + 1); // Remove quotes if (!value.empty() && value.front() == '"' && value.back() == '"') { value = value.substr(1, value.length() - 2); } #ifdef _WIN32 _putenv_s(key.c_str(), value.c_str()); #else setenv(key.c_str(), value.c_str(), 1); #endif } } spdlog::info("Loaded environment from {}", path); } // Helper: Get API key from env std::string getEnvVar(const std::string& name) { const char* val = std::getenv(name.c_str()); return val ? std::string(val) : ""; } // Helper: Load audio file as WAV (simplified - assumes 16-bit PCM) std::vector loadWavFile(const std::string& path) { std::ifstream file(path, std::ios::binary); if (!file.is_open()) { spdlog::error("Failed to open audio file: {}", path); return {}; } // Skip WAV header (44 bytes) file.seekg(44); // Read 16-bit PCM samples std::vector samples; int16_t sample; while (file.read(reinterpret_cast(&sample), sizeof(sample))) { samples.push_back(sample); } // Convert to float [-1.0, 1.0] std::vector audioData; audioData.reserve(samples.size()); for (int16_t s : samples) { audioData.push_back(static_cast(s) / 32768.0f); } spdlog::info("Loaded {} samples from {}", audioData.size(), path); return audioData; } int main(int argc, char* argv[]) { spdlog::set_level(spdlog::level::info); spdlog::info("=== AISSIA STT Live Test ==="); // Load environment variables loadEnv(); // Check command line if (argc < 2) { std::cout << "Usage: " << argv[0] << " \n"; std::cout << "\nAvailable engines:\n"; std::cout << " 1. Whisper.cpp (local, requires models/ggml-base.bin)\n"; std::cout << " 2. Whisper API (requires OPENAI_API_KEY)\n"; std::cout << " 3. Google Speech (requires GOOGLE_API_KEY)\n"; std::cout << " 4. Azure STT (requires AZURE_SPEECH_KEY + AZURE_SPEECH_REGION)\n"; std::cout << " 5. Deepgram (requires DEEPGRAM_API_KEY)\n"; return 1; } std::string audioFile = argv[1]; // Load audio std::vector audioData = loadWavFile(audioFile); if (audioData.empty()) { spdlog::error("Failed to load audio data"); return 1; } // Test each engine std::cout << "\n========================================\n"; std::cout << "Testing STT Engines\n"; std::cout << "========================================\n\n"; // 1. Whisper.cpp (local) { std::cout << "[1/5] Whisper.cpp (local)\n"; std::cout << "----------------------------\n"; try { auto engine = STTEngineFactory::create("whisper_cpp", "models/ggml-base.bin"); if (engine && engine->isAvailable()) { engine->setLanguage("fr"); std::string result = engine->transcribe(audioData); std::cout << "✅ Result: " << result << "\n\n"; } else { std::cout << "❌ Not available (model missing?)\n\n"; } } catch (const std::exception& e) { std::cout << "❌ Error: " << e.what() << "\n\n"; } } // 2. Whisper API { std::cout << "[2/5] OpenAI Whisper API\n"; std::cout << "----------------------------\n"; std::string apiKey = getEnvVar("OPENAI_API_KEY"); if (apiKey.empty()) { std::cout << "❌ OPENAI_API_KEY not set\n\n"; } else { try { auto engine = STTEngineFactory::create("whisper_api", "", apiKey); if (engine && engine->isAvailable()) { engine->setLanguage("fr"); std::string result = engine->transcribeFile(audioFile); std::cout << "✅ Result: " << result << "\n\n"; } else { std::cout << "❌ Not available\n\n"; } } catch (const std::exception& e) { std::cout << "❌ Error: " << e.what() << "\n\n"; } } } // 3. Google Speech { std::cout << "[3/5] Google Speech-to-Text\n"; std::cout << "----------------------------\n"; std::string apiKey = getEnvVar("GOOGLE_API_KEY"); if (apiKey.empty()) { std::cout << "❌ GOOGLE_API_KEY not set\n\n"; } else { try { auto engine = STTEngineFactory::create("google", "", apiKey); if (engine && engine->isAvailable()) { engine->setLanguage("fr"); std::string result = engine->transcribeFile(audioFile); std::cout << "✅ Result: " << result << "\n\n"; } else { std::cout << "❌ Not available\n\n"; } } catch (const std::exception& e) { std::cout << "❌ Error: " << e.what() << "\n\n"; } } } // 4. Azure Speech { std::cout << "[4/5] Azure Speech-to-Text\n"; std::cout << "----------------------------\n"; std::string apiKey = getEnvVar("AZURE_SPEECH_KEY"); std::string region = getEnvVar("AZURE_SPEECH_REGION"); if (apiKey.empty() || region.empty()) { std::cout << "❌ AZURE_SPEECH_KEY or AZURE_SPEECH_REGION not set\n\n"; } else { try { auto engine = STTEngineFactory::create("azure", region, apiKey); if (engine && engine->isAvailable()) { engine->setLanguage("fr"); std::string result = engine->transcribeFile(audioFile); std::cout << "✅ Result: " << result << "\n\n"; } else { std::cout << "❌ Not available\n\n"; } } catch (const std::exception& e) { std::cout << "❌ Error: " << e.what() << "\n\n"; } } } // 5. Deepgram { std::cout << "[5/5] Deepgram\n"; std::cout << "----------------------------\n"; std::string apiKey = getEnvVar("DEEPGRAM_API_KEY"); if (apiKey.empty()) { std::cout << "❌ DEEPGRAM_API_KEY not set\n\n"; } else { try { auto engine = STTEngineFactory::create("deepgram", "", apiKey); if (engine && engine->isAvailable()) { engine->setLanguage("fr"); std::string result = engine->transcribeFile(audioFile); std::cout << "✅ Result: " << result << "\n\n"; } else { std::cout << "❌ Not available\n\n"; } } catch (const std::exception& e) { std::cout << "❌ Error: " << e.what() << "\n\n"; } } } std::cout << "========================================\n"; std::cout << "Testing complete!\n"; std::cout << "========================================\n"; return 0; }