aissia/tests/manual/test_stt_engines.cpp
StillHammer 099e0d837e feat: Phase 7.2 - Integrate Whisper.cpp STT engine
Complete STT system with 4 engine options:
- WhisperCpp: High-quality local STT (ggml-base model, 147MB)
- WhisperAPI: Cloud STT via OpenAI
- PocketSphinx: Lightweight keyword spotting (optional)
- Vosk: Balanced local STT (optional)

Changes:
- Add whisper.cpp as git submodule
- Link whisper library with conditional compilation
- Create test_stt_engines manual test program
- Add PocketSphinx and Whisper.cpp optional dependencies

Test results:
 WhisperCpp: Compiled, model loaded successfully
 WhisperAPI: Compiled (requires API key)
⚠️  PocketSphinx: Compiled (model path needs config)
 Vosk: Library not available in Ubuntu repos

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-29 22:27:44 +08:00

183 lines
7.2 KiB
C++

/**
* @file test_stt_engines.cpp
* @brief Manual test program for all 4 STT engines
*
* Tests each STT engine with the same audio file and compares results.
*
* Usage: ./test_stt_engines <audio_file.mp3>
*/
#include "../../src/shared/audio/ISTTEngine.hpp"
#include "../../src/shared/audio/PocketSphinxEngine.hpp"
#include "../../src/shared/audio/VoskSTTEngine.hpp"
#include "../../src/shared/audio/WhisperCppEngine.hpp"
#include "../../src/shared/audio/WhisperAPIEngine.hpp"
#include <spdlog/spdlog.h>
#include <spdlog/sinks/stdout_color_sinks.h>
#include <iostream>
#include <chrono>
#include <cstdlib>
using namespace aissia;
struct TestResult {
std::string engineName;
bool available;
std::string transcription;
double durationMs;
std::string error;
};
TestResult testEngine(ISTTEngine* engine, const std::string& audioFile) {
TestResult result;
result.engineName = engine->getEngineName();
result.available = engine->isAvailable();
if (!result.available) {
result.error = "Engine not available";
return result;
}
auto start = std::chrono::high_resolution_clock::now();
try {
result.transcription = engine->transcribeFile(audioFile);
auto end = std::chrono::high_resolution_clock::now();
result.durationMs = std::chrono::duration<double, std::milli>(end - start).count();
if (result.transcription.empty()) {
result.error = "Empty transcription (file format not supported or processing failed)";
}
} catch (const std::exception& e) {
result.error = std::string("Exception: ") + e.what();
}
return result;
}
void printResult(const TestResult& result) {
std::cout << "\n";
std::cout << "┌─────────────────────────────────────────────────────────\n";
std::cout << "│ Engine: " << result.engineName << "\n";
std::cout << "├─────────────────────────────────────────────────────────\n";
if (!result.available) {
std::cout << "│ Status: ❌ NOT AVAILABLE\n";
std::cout << "│ Reason: " << result.error << "\n";
} else if (!result.error.empty()) {
std::cout << "│ Status: ⚠️ ERROR\n";
std::cout << "│ Error: " << result.error << "\n";
} else {
std::cout << "│ Status: ✅ SUCCESS\n";
std::cout << "│ Duration: " << result.durationMs << " ms\n";
std::cout << "│ Transcription: \"" << result.transcription << "\"\n";
}
std::cout << "└─────────────────────────────────────────────────────────\n";
}
int main(int argc, char* argv[]) {
// Setup logging
auto logger = spdlog::stdout_color_mt("test");
spdlog::set_level(spdlog::level::info);
// Check arguments
if (argc < 2) {
std::cerr << "Usage: " << argv[0] << " <audio_file>\n";
std::cerr << "Example: " << argv[0] << " test_audio.mp3\n";
return 1;
}
std::string audioFile = argv[1];
std::cout << "\n";
std::cout << "╔═══════════════════════════════════════════════════════════╗\n";
std::cout << "║ STT ENGINES TEST - AISSIA Phase 7 ║\n";
std::cout << "╚═══════════════════════════════════════════════════════════╝\n";
std::cout << "\n";
std::cout << "Audio file: " << audioFile << "\n";
std::cout << "\n";
std::cout << "Testing 4 STT engines...\n";
// Prepare engines
std::vector<std::pair<std::string, std::unique_ptr<ISTTEngine>>> engines;
// 1. PocketSphinx
std::cout << "\n[1/4] Initializing PocketSphinx...\n";
engines.push_back({
"PocketSphinx",
std::make_unique<PocketSphinxEngine>("/usr/share/pocketsphinx/model/en-us")
});
// 2. Vosk
std::cout << "[2/4] Initializing Vosk...\n";
engines.push_back({
"Vosk",
std::make_unique<VoskSTTEngine>("./models/vosk-model-small-fr-0.22")
});
// 3. Whisper.cpp
std::cout << "[3/4] Initializing Whisper.cpp...\n";
engines.push_back({
"Whisper.cpp",
std::make_unique<WhisperCppEngine>("./models/ggml-base.bin")
});
// 4. Whisper API
std::cout << "[4/4] Initializing Whisper API...\n";
const char* apiKey = std::getenv("OPENAI_API_KEY");
engines.push_back({
"Whisper API",
std::make_unique<WhisperAPIEngine>(apiKey ? apiKey : "")
});
// Test each engine
std::vector<TestResult> results;
for (auto& [name, engine] : engines) {
std::cout << "\n▶ Testing " << name << "...\n";
results.push_back(testEngine(engine.get(), audioFile));
}
// Print results
std::cout << "\n\n";
std::cout << "╔═══════════════════════════════════════════════════════════╗\n";
std::cout << "║ RESULTS ║\n";
std::cout << "╚═══════════════════════════════════════════════════════════╝\n";
for (const auto& result : results) {
printResult(result);
}
// Summary
std::cout << "\n\n";
std::cout << "╔═══════════════════════════════════════════════════════════╗\n";
std::cout << "║ SUMMARY ║\n";
std::cout << "╚═══════════════════════════════════════════════════════════╝\n";
std::cout << "\n";
int available = 0;
int successful = 0;
for (const auto& result : results) {
if (result.available) available++;
if (result.available && result.error.empty()) successful++;
}
std::cout << "Total engines tested: " << results.size() << "\n";
std::cout << "Engines available: " << available << "/" << results.size() << "\n";
std::cout << "Successful transcriptions: " << successful << "/" << available << "\n";
std::cout << "\n";
if (successful > 0) {
std::cout << "✅ STT system is working!\n";
return 0;
} else if (available > 0) {
std::cout << "⚠️ Some engines available but all failed (check audio file format)\n";
return 1;
} else {
std::cout << "❌ No STT engines available (install models/libraries)\n";
return 1;
}
}