aissia/test_stt_live.cpp

238 lines
8.0 KiB
C++

/**
* @file test_stt_live.cpp
* @brief Live STT testing tool - Test all 4 engines
*/
#include "src/shared/audio/ISTTEngine.hpp"
#include <spdlog/spdlog.h>
#include <iostream>
#include <fstream>
#include <vector>
#include <cstdlib>
using namespace aissia;
// Helper: Load .env file
void loadEnv(const std::string& path = ".env") {
std::ifstream file(path);
if (!file.is_open()) {
spdlog::warn("No .env file found at: {}", path);
return;
}
std::string line;
while (std::getline(file, line)) {
if (line.empty() || line[0] == '#') continue;
auto pos = line.find('=');
if (pos != std::string::npos) {
std::string key = line.substr(0, pos);
std::string value = line.substr(pos + 1);
// Remove quotes
if (!value.empty() && value.front() == '"' && value.back() == '"') {
value = value.substr(1, value.length() - 2);
}
#ifdef _WIN32
_putenv_s(key.c_str(), value.c_str());
#else
setenv(key.c_str(), value.c_str(), 1);
#endif
}
}
spdlog::info("Loaded environment from {}", path);
}
// Helper: Get API key from env
std::string getEnvVar(const std::string& name) {
const char* val = std::getenv(name.c_str());
return val ? std::string(val) : "";
}
// Helper: Load audio file as WAV (simplified - assumes 16-bit PCM)
std::vector<float> loadWavFile(const std::string& path) {
std::ifstream file(path, std::ios::binary);
if (!file.is_open()) {
spdlog::error("Failed to open audio file: {}", path);
return {};
}
// Skip WAV header (44 bytes)
file.seekg(44);
// Read 16-bit PCM samples
std::vector<int16_t> samples;
int16_t sample;
while (file.read(reinterpret_cast<char*>(&sample), sizeof(sample))) {
samples.push_back(sample);
}
// Convert to float [-1.0, 1.0]
std::vector<float> audioData;
audioData.reserve(samples.size());
for (int16_t s : samples) {
audioData.push_back(static_cast<float>(s) / 32768.0f);
}
spdlog::info("Loaded {} samples from {}", audioData.size(), path);
return audioData;
}
int main(int argc, char* argv[]) {
spdlog::set_level(spdlog::level::info);
spdlog::info("=== AISSIA STT Live Test ===");
// Load environment variables
loadEnv();
// Check command line
if (argc < 2) {
std::cout << "Usage: " << argv[0] << " <audio.wav>\n";
std::cout << "\nAvailable engines:\n";
std::cout << " 1. Whisper.cpp (local, requires models/ggml-base.bin)\n";
std::cout << " 2. Whisper API (requires OPENAI_API_KEY)\n";
std::cout << " 3. Google Speech (requires GOOGLE_API_KEY)\n";
std::cout << " 4. Azure STT (requires AZURE_SPEECH_KEY + AZURE_SPEECH_REGION)\n";
std::cout << " 5. Deepgram (requires DEEPGRAM_API_KEY)\n";
return 1;
}
std::string audioFile = argv[1];
// Load audio
std::vector<float> audioData = loadWavFile(audioFile);
if (audioData.empty()) {
spdlog::error("Failed to load audio data");
return 1;
}
// Test each engine
std::cout << "\n========================================\n";
std::cout << "Testing STT Engines\n";
std::cout << "========================================\n\n";
// 1. Whisper.cpp (local)
{
std::cout << "[1/5] Whisper.cpp (local)\n";
std::cout << "----------------------------\n";
try {
auto engine = STTEngineFactory::create("whisper_cpp", "models/ggml-base.bin");
if (engine && engine->isAvailable()) {
engine->setLanguage("fr");
std::string result = engine->transcribe(audioData);
std::cout << "✅ Result: " << result << "\n\n";
} else {
std::cout << "❌ Not available (model missing?)\n\n";
}
} catch (const std::exception& e) {
std::cout << "❌ Error: " << e.what() << "\n\n";
}
}
// 2. Whisper API
{
std::cout << "[2/5] OpenAI Whisper API\n";
std::cout << "----------------------------\n";
std::string apiKey = getEnvVar("OPENAI_API_KEY");
if (apiKey.empty()) {
std::cout << "❌ OPENAI_API_KEY not set\n\n";
} else {
try {
auto engine = STTEngineFactory::create("whisper_api", "", apiKey);
if (engine && engine->isAvailable()) {
engine->setLanguage("fr");
std::string result = engine->transcribeFile(audioFile);
std::cout << "✅ Result: " << result << "\n\n";
} else {
std::cout << "❌ Not available\n\n";
}
} catch (const std::exception& e) {
std::cout << "❌ Error: " << e.what() << "\n\n";
}
}
}
// 3. Google Speech
{
std::cout << "[3/5] Google Speech-to-Text\n";
std::cout << "----------------------------\n";
std::string apiKey = getEnvVar("GOOGLE_API_KEY");
if (apiKey.empty()) {
std::cout << "❌ GOOGLE_API_KEY not set\n\n";
} else {
try {
auto engine = STTEngineFactory::create("google", "", apiKey);
if (engine && engine->isAvailable()) {
engine->setLanguage("fr");
std::string result = engine->transcribeFile(audioFile);
std::cout << "✅ Result: " << result << "\n\n";
} else {
std::cout << "❌ Not available\n\n";
}
} catch (const std::exception& e) {
std::cout << "❌ Error: " << e.what() << "\n\n";
}
}
}
// 4. Azure Speech
{
std::cout << "[4/5] Azure Speech-to-Text\n";
std::cout << "----------------------------\n";
std::string apiKey = getEnvVar("AZURE_SPEECH_KEY");
std::string region = getEnvVar("AZURE_SPEECH_REGION");
if (apiKey.empty() || region.empty()) {
std::cout << "❌ AZURE_SPEECH_KEY or AZURE_SPEECH_REGION not set\n\n";
} else {
try {
auto engine = STTEngineFactory::create("azure", region, apiKey);
if (engine && engine->isAvailable()) {
engine->setLanguage("fr");
std::string result = engine->transcribeFile(audioFile);
std::cout << "✅ Result: " << result << "\n\n";
} else {
std::cout << "❌ Not available\n\n";
}
} catch (const std::exception& e) {
std::cout << "❌ Error: " << e.what() << "\n\n";
}
}
}
// 5. Deepgram
{
std::cout << "[5/5] Deepgram\n";
std::cout << "----------------------------\n";
std::string apiKey = getEnvVar("DEEPGRAM_API_KEY");
if (apiKey.empty()) {
std::cout << "❌ DEEPGRAM_API_KEY not set\n\n";
} else {
try {
auto engine = STTEngineFactory::create("deepgram", "", apiKey);
if (engine && engine->isAvailable()) {
engine->setLanguage("fr");
std::string result = engine->transcribeFile(audioFile);
std::cout << "✅ Result: " << result << "\n\n";
} else {
std::cout << "❌ Not available\n\n";
}
} catch (const std::exception& e) {
std::cout << "❌ Error: " << e.what() << "\n\n";
}
}
}
std::cout << "========================================\n";
std::cout << "Testing complete!\n";
std::cout << "========================================\n";
return 0;
}