diff --git a/create_test_audio.py b/create_test_audio.py new file mode 100644 index 0000000..17acbfc --- /dev/null +++ b/create_test_audio.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 +"""Generate test audio WAV file for STT testing""" + +import sys + +try: + from gtts import gTTS + import os + from pydub import AudioSegment + + # Generate French test audio + text = "Bonjour, ceci est un test de reconnaissance vocale." + print(f"Generating audio: '{text}'") + + # Create TTS + tts = gTTS(text=text, lang='fr', slow=False) + tts.save("test_audio_temp.mp3") + print("✓ Generated MP3") + + # Convert to WAV (16kHz, mono, 16-bit PCM) + audio = AudioSegment.from_mp3("test_audio_temp.mp3") + audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2) + audio.export("test_audio.wav", format="wav") + print("✓ Converted to WAV (16kHz, mono, 16-bit)") + + # Cleanup + os.remove("test_audio_temp.mp3") + print("✓ Saved as test_audio.wav") + print(f"Duration: {len(audio)/1000:.1f}s") + +except ImportError as e: + print(f"Missing dependency: {e}") + print("\nInstall with: pip install gtts pydub") + print("Note: pydub also requires ffmpeg") + sys.exit(1) diff --git a/create_test_audio_simple.py b/create_test_audio_simple.py new file mode 100644 index 0000000..eeff8f2 --- /dev/null +++ b/create_test_audio_simple.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +"""Generate simple test audio WAV file using only stdlib""" + +import wave +import struct +import math + +# WAV parameters +sample_rate = 16000 +duration = 2 # seconds +frequency = 440 # Hz (A4 note) + +# Generate sine wave samples +samples = [] +for i in range(int(sample_rate * duration)): + # Sine wave value (-1.0 to 1.0) + value = math.sin(2.0 * math.pi * frequency * i / sample_rate) + + # Convert to 16-bit PCM (-32768 to 32767) + sample = int(value * 32767) + samples.append(sample) + +# Write WAV file +with wave.open("test_audio.wav", "w") as wav_file: + # Set parameters (1 channel, 2 bytes per sample, 16kHz) + wav_file.setnchannels(1) + wav_file.setsampwidth(2) + wav_file.setframerate(sample_rate) + + # Write frames + for sample in samples: + wav_file.writeframes(struct.pack(' +#include +#include +#include +#include + +using namespace aissia; + +// Helper: Load .env file +void loadEnv(const std::string& path = ".env") { + std::ifstream file(path); + if (!file.is_open()) { + spdlog::warn("No .env file found at: {}", path); + return; + } + + std::string line; + while (std::getline(file, line)) { + if (line.empty() || line[0] == '#') continue; + + auto pos = line.find('='); + if (pos != std::string::npos) { + std::string key = line.substr(0, pos); + std::string value = line.substr(pos + 1); + + // Remove quotes + if (!value.empty() && value.front() == '"' && value.back() == '"') { + value = value.substr(1, value.length() - 2); + } + + #ifdef _WIN32 + _putenv_s(key.c_str(), value.c_str()); + #else + setenv(key.c_str(), value.c_str(), 1); + #endif + } + } + spdlog::info("Loaded environment from {}", path); +} + +// Helper: Get API key from env +std::string getEnvVar(const std::string& name) { + const char* val = std::getenv(name.c_str()); + return val ? std::string(val) : ""; +} + +// Helper: Load audio file as WAV (simplified - assumes 16-bit PCM) +std::vector loadWavFile(const std::string& path) { + std::ifstream file(path, std::ios::binary); + if (!file.is_open()) { + spdlog::error("Failed to open audio file: {}", path); + return {}; + } + + // Skip WAV header (44 bytes) + file.seekg(44); + + // Read 16-bit PCM samples + std::vector samples; + int16_t sample; + while (file.read(reinterpret_cast(&sample), sizeof(sample))) { + samples.push_back(sample); + } + + // Convert to float [-1.0, 1.0] + std::vector audioData; + audioData.reserve(samples.size()); + for (int16_t s : samples) { + audioData.push_back(static_cast(s) / 32768.0f); + } + + spdlog::info("Loaded {} samples from {}", audioData.size(), path); + return audioData; +} + +int main(int argc, char* argv[]) { + spdlog::set_level(spdlog::level::info); + spdlog::info("=== AISSIA STT Live Test ==="); + + // Load environment variables + loadEnv(); + + // Check command line + if (argc < 2) { + std::cout << "Usage: " << argv[0] << " \n"; + std::cout << "\nAvailable engines:\n"; + std::cout << " 1. Whisper.cpp (local, requires models/ggml-base.bin)\n"; + std::cout << " 2. Whisper API (requires OPENAI_API_KEY)\n"; + std::cout << " 3. Google Speech (requires GOOGLE_API_KEY)\n"; + std::cout << " 4. Azure STT (requires AZURE_SPEECH_KEY + AZURE_SPEECH_REGION)\n"; + std::cout << " 5. Deepgram (requires DEEPGRAM_API_KEY)\n"; + return 1; + } + + std::string audioFile = argv[1]; + + // Load audio + std::vector audioData = loadWavFile(audioFile); + if (audioData.empty()) { + spdlog::error("Failed to load audio data"); + return 1; + } + + // Test each engine + std::cout << "\n========================================\n"; + std::cout << "Testing STT Engines\n"; + std::cout << "========================================\n\n"; + + // 1. Whisper.cpp (local) + { + std::cout << "[1/5] Whisper.cpp (local)\n"; + std::cout << "----------------------------\n"; + + try { + auto engine = STTEngineFactory::create("whisper_cpp", "models/ggml-base.bin"); + if (engine && engine->isAvailable()) { + engine->setLanguage("fr"); + std::string result = engine->transcribe(audioData); + std::cout << "✅ Result: " << result << "\n\n"; + } else { + std::cout << "❌ Not available (model missing?)\n\n"; + } + } catch (const std::exception& e) { + std::cout << "❌ Error: " << e.what() << "\n\n"; + } + } + + // 2. Whisper API + { + std::cout << "[2/5] OpenAI Whisper API\n"; + std::cout << "----------------------------\n"; + + std::string apiKey = getEnvVar("OPENAI_API_KEY"); + if (apiKey.empty()) { + std::cout << "❌ OPENAI_API_KEY not set\n\n"; + } else { + try { + auto engine = STTEngineFactory::create("whisper_api", "", apiKey); + if (engine && engine->isAvailable()) { + engine->setLanguage("fr"); + std::string result = engine->transcribeFile(audioFile); + std::cout << "✅ Result: " << result << "\n\n"; + } else { + std::cout << "❌ Not available\n\n"; + } + } catch (const std::exception& e) { + std::cout << "❌ Error: " << e.what() << "\n\n"; + } + } + } + + // 3. Google Speech + { + std::cout << "[3/5] Google Speech-to-Text\n"; + std::cout << "----------------------------\n"; + + std::string apiKey = getEnvVar("GOOGLE_API_KEY"); + if (apiKey.empty()) { + std::cout << "❌ GOOGLE_API_KEY not set\n\n"; + } else { + try { + auto engine = STTEngineFactory::create("google", "", apiKey); + if (engine && engine->isAvailable()) { + engine->setLanguage("fr"); + std::string result = engine->transcribeFile(audioFile); + std::cout << "✅ Result: " << result << "\n\n"; + } else { + std::cout << "❌ Not available\n\n"; + } + } catch (const std::exception& e) { + std::cout << "❌ Error: " << e.what() << "\n\n"; + } + } + } + + // 4. Azure Speech + { + std::cout << "[4/5] Azure Speech-to-Text\n"; + std::cout << "----------------------------\n"; + + std::string apiKey = getEnvVar("AZURE_SPEECH_KEY"); + std::string region = getEnvVar("AZURE_SPEECH_REGION"); + + if (apiKey.empty() || region.empty()) { + std::cout << "❌ AZURE_SPEECH_KEY or AZURE_SPEECH_REGION not set\n\n"; + } else { + try { + auto engine = STTEngineFactory::create("azure", region, apiKey); + if (engine && engine->isAvailable()) { + engine->setLanguage("fr"); + std::string result = engine->transcribeFile(audioFile); + std::cout << "✅ Result: " << result << "\n\n"; + } else { + std::cout << "❌ Not available\n\n"; + } + } catch (const std::exception& e) { + std::cout << "❌ Error: " << e.what() << "\n\n"; + } + } + } + + // 5. Deepgram + { + std::cout << "[5/5] Deepgram\n"; + std::cout << "----------------------------\n"; + + std::string apiKey = getEnvVar("DEEPGRAM_API_KEY"); + if (apiKey.empty()) { + std::cout << "❌ DEEPGRAM_API_KEY not set\n\n"; + } else { + try { + auto engine = STTEngineFactory::create("deepgram", "", apiKey); + if (engine && engine->isAvailable()) { + engine->setLanguage("fr"); + std::string result = engine->transcribeFile(audioFile); + std::cout << "✅ Result: " << result << "\n\n"; + } else { + std::cout << "❌ Not available\n\n"; + } + } catch (const std::exception& e) { + std::cout << "❌ Error: " << e.what() << "\n\n"; + } + } + } + + std::cout << "========================================\n"; + std::cout << "Testing complete!\n"; + std::cout << "========================================\n"; + + return 0; +}