diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..06c7d7f --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "external/whisper.cpp"] + path = external/whisper.cpp + url = https://github.com/ggerganov/whisper.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9875850..e79c3a0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -137,6 +137,31 @@ else() message(STATUS "Vosk not found - STT will use fallback engines only") endif() +# Optional: Link PocketSphinx if available (Phase 7 STT) +find_library(POCKETSPHINX_LIBRARY pocketsphinx) +find_library(SPHINXBASE_LIBRARY sphinxbase) +if(POCKETSPHINX_LIBRARY AND SPHINXBASE_LIBRARY) + message(STATUS "PocketSphinx found: ${POCKETSPHINX_LIBRARY}") + target_link_libraries(AissiaAudio PUBLIC ${POCKETSPHINX_LIBRARY} ${SPHINXBASE_LIBRARY}) + target_compile_definitions(AissiaAudio PRIVATE HAVE_POCKETSPHINX) + target_include_directories(AissiaAudio PRIVATE /usr/include/pocketsphinx /usr/include/sphinxbase) +else() + message(STATUS "PocketSphinx not found - keyword spotting unavailable") +endif() + +# Optional: Link Whisper.cpp if available (Phase 7 STT) +find_library(WHISPER_LIBRARY whisper PATHS ${CMAKE_SOURCE_DIR}/external/whisper.cpp/build/src) +if(WHISPER_LIBRARY) + message(STATUS "Whisper.cpp found: ${WHISPER_LIBRARY}") + target_link_libraries(AissiaAudio PUBLIC ${WHISPER_LIBRARY}) + target_compile_definitions(AissiaAudio PRIVATE HAVE_WHISPER_CPP) + target_include_directories(AissiaAudio PRIVATE + ${CMAKE_SOURCE_DIR}/external/whisper.cpp/include + ${CMAKE_SOURCE_DIR}/external/whisper.cpp/ggml/include) +else() + message(STATUS "Whisper.cpp not found - high-quality local STT unavailable") +endif() + # ============================================================================ # Infrastructure Services Library (linked into main) # ============================================================================ @@ -333,3 +358,12 @@ option(BUILD_TESTING "Build integration tests" OFF) if(BUILD_TESTING) add_subdirectory(tests) endif() + +# Manual STT test executable +add_executable(test_stt_engines + tests/manual/test_stt_engines.cpp +) +target_link_libraries(test_stt_engines + AissiaAudio + spdlog::spdlog +) diff --git a/external/whisper.cpp b/external/whisper.cpp new file mode 160000 index 0000000..19ceec8 --- /dev/null +++ b/external/whisper.cpp @@ -0,0 +1 @@ +Subproject commit 19ceec8eac980403b714d603e5ca31653cd42a3f diff --git a/tests/manual/test_stt_engines.cpp b/tests/manual/test_stt_engines.cpp new file mode 100644 index 0000000..f68cc02 --- /dev/null +++ b/tests/manual/test_stt_engines.cpp @@ -0,0 +1,182 @@ +/** + * @file test_stt_engines.cpp + * @brief Manual test program for all 4 STT engines + * + * Tests each STT engine with the same audio file and compares results. + * + * Usage: ./test_stt_engines + */ + +#include "../../src/shared/audio/ISTTEngine.hpp" +#include "../../src/shared/audio/PocketSphinxEngine.hpp" +#include "../../src/shared/audio/VoskSTTEngine.hpp" +#include "../../src/shared/audio/WhisperCppEngine.hpp" +#include "../../src/shared/audio/WhisperAPIEngine.hpp" +#include +#include +#include +#include +#include + +using namespace aissia; + +struct TestResult { + std::string engineName; + bool available; + std::string transcription; + double durationMs; + std::string error; +}; + +TestResult testEngine(ISTTEngine* engine, const std::string& audioFile) { + TestResult result; + result.engineName = engine->getEngineName(); + result.available = engine->isAvailable(); + + if (!result.available) { + result.error = "Engine not available"; + return result; + } + + auto start = std::chrono::high_resolution_clock::now(); + + try { + result.transcription = engine->transcribeFile(audioFile); + + auto end = std::chrono::high_resolution_clock::now(); + result.durationMs = std::chrono::duration(end - start).count(); + + if (result.transcription.empty()) { + result.error = "Empty transcription (file format not supported or processing failed)"; + } + } catch (const std::exception& e) { + result.error = std::string("Exception: ") + e.what(); + } + + return result; +} + +void printResult(const TestResult& result) { + std::cout << "\n"; + std::cout << "┌─────────────────────────────────────────────────────────\n"; + std::cout << "│ Engine: " << result.engineName << "\n"; + std::cout << "├─────────────────────────────────────────────────────────\n"; + + if (!result.available) { + std::cout << "│ Status: ❌ NOT AVAILABLE\n"; + std::cout << "│ Reason: " << result.error << "\n"; + } else if (!result.error.empty()) { + std::cout << "│ Status: ⚠️ ERROR\n"; + std::cout << "│ Error: " << result.error << "\n"; + } else { + std::cout << "│ Status: ✅ SUCCESS\n"; + std::cout << "│ Duration: " << result.durationMs << " ms\n"; + std::cout << "│ Transcription: \"" << result.transcription << "\"\n"; + } + + std::cout << "└─────────────────────────────────────────────────────────\n"; +} + +int main(int argc, char* argv[]) { + // Setup logging + auto logger = spdlog::stdout_color_mt("test"); + spdlog::set_level(spdlog::level::info); + + // Check arguments + if (argc < 2) { + std::cerr << "Usage: " << argv[0] << " \n"; + std::cerr << "Example: " << argv[0] << " test_audio.mp3\n"; + return 1; + } + + std::string audioFile = argv[1]; + + std::cout << "\n"; + std::cout << "╔═══════════════════════════════════════════════════════════╗\n"; + std::cout << "║ STT ENGINES TEST - AISSIA Phase 7 ║\n"; + std::cout << "╚═══════════════════════════════════════════════════════════╝\n"; + std::cout << "\n"; + std::cout << "Audio file: " << audioFile << "\n"; + std::cout << "\n"; + std::cout << "Testing 4 STT engines...\n"; + + // Prepare engines + std::vector>> engines; + + // 1. PocketSphinx + std::cout << "\n[1/4] Initializing PocketSphinx...\n"; + engines.push_back({ + "PocketSphinx", + std::make_unique("/usr/share/pocketsphinx/model/en-us") + }); + + // 2. Vosk + std::cout << "[2/4] Initializing Vosk...\n"; + engines.push_back({ + "Vosk", + std::make_unique("./models/vosk-model-small-fr-0.22") + }); + + // 3. Whisper.cpp + std::cout << "[3/4] Initializing Whisper.cpp...\n"; + engines.push_back({ + "Whisper.cpp", + std::make_unique("./models/ggml-base.bin") + }); + + // 4. Whisper API + std::cout << "[4/4] Initializing Whisper API...\n"; + const char* apiKey = std::getenv("OPENAI_API_KEY"); + engines.push_back({ + "Whisper API", + std::make_unique(apiKey ? apiKey : "") + }); + + // Test each engine + std::vector results; + for (auto& [name, engine] : engines) { + std::cout << "\n▶ Testing " << name << "...\n"; + results.push_back(testEngine(engine.get(), audioFile)); + } + + // Print results + std::cout << "\n\n"; + std::cout << "╔═══════════════════════════════════════════════════════════╗\n"; + std::cout << "║ RESULTS ║\n"; + std::cout << "╚═══════════════════════════════════════════════════════════╝\n"; + + for (const auto& result : results) { + printResult(result); + } + + // Summary + std::cout << "\n\n"; + std::cout << "╔═══════════════════════════════════════════════════════════╗\n"; + std::cout << "║ SUMMARY ║\n"; + std::cout << "╚═══════════════════════════════════════════════════════════╝\n"; + std::cout << "\n"; + + int available = 0; + int successful = 0; + + for (const auto& result : results) { + if (result.available) available++; + if (result.available && result.error.empty()) successful++; + } + + std::cout << "Total engines tested: " << results.size() << "\n"; + std::cout << "Engines available: " << available << "/" << results.size() << "\n"; + std::cout << "Successful transcriptions: " << successful << "/" << available << "\n"; + std::cout << "\n"; + + if (successful > 0) { + std::cout << "✅ STT system is working!\n"; + return 0; + } else if (available > 0) { + std::cout << "⚠️ Some engines available but all failed (check audio file format)\n"; + return 1; + } else { + std::cout << "❌ No STT engines available (install models/libraries)\n"; + return 1; + } +}