Architecture Phase 7 STT implémentée mais bloquée par conflits de macros
entre GroveEngine (JsonDataNode.h) et spdlog/fmt.
## Nouveau contenu
### Interfaces & Services
- ISTTService.hpp: Interface service STT (modes passive/active, callbacks)
- STTService.{hpp,cpp}: Implémentation service STT avec factory pattern
- VoskSTTEngine.{hpp,cpp}: Engine STT local Vosk (~50MB model)
### Factory Pattern
- STTEngineFactory: Support multi-engines (Vosk, Whisper API, auto-select)
- Fallback automatique Vosk -> Whisper API
### Configuration
- config/voice.json: Config Phase 7 (passive_mode, active_mode, whisper_api)
- Support modèles Vosk locaux + fallback cloud
### Intégration
- VoiceService: Nouvelle méthode configureSTT(json) pour Phase 7
- main.cpp: Chargement config STT depuis voice.json
- CMakeLists.txt: Ajout fichiers + dépendance optionnelle Vosk
## Problème de Compilation
**Bloqué par conflits de macros**:
- JsonDataNode.h (GroveEngine) définit des macros qui polluent 'logger' et 'queue'
- Cause erreurs dans VoiceService.cpp et STTService.cpp
- Voir plans/PHASE7_COMPILATION_ISSUE.md pour diagnostic complet
## Fonctionnalités Implémentées
✅ Architecture STT complète (service layer + engines)
✅ Support Vosk local (modèles français)
✅ Factory pattern avec auto-selection
✅ Configuration JSON Phase 7
✅ Callbacks transcription/keywords
❌ Ne compile pas (macro conflicts)
## Prochaines Étapes
1. Résoudre conflits macros (fixer GroveEngine ou isolation namespace)
2. Phase 7.2: PocketSphinxEngine (keyword spotting "Celuna")
3. Tests intégration STT
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
229 lines
6.4 KiB
C++
229 lines
6.4 KiB
C++
// CRITICAL ORDER: Include system headers before local headers to avoid macro conflicts
|
|
#include <nlohmann/json.hpp>
|
|
#include <cstdlib>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <queue>
|
|
|
|
// Include VoiceService.hpp BEFORE spdlog to avoid logger macro conflicts
|
|
#include "VoiceService.hpp"
|
|
#include "STTService.hpp"
|
|
|
|
// Include spdlog after VoiceService.hpp
|
|
#include <spdlog/sinks/stdout_color_sinks.h>
|
|
|
|
namespace aissia {
|
|
|
|
VoiceService::VoiceService() {
|
|
m_logger = spdlog::get("VoiceService");
|
|
if (!m_logger) {
|
|
m_logger = spdlog::stdout_color_mt("VoiceService");
|
|
}
|
|
}
|
|
|
|
bool VoiceService::initialize(grove::IIO* io) {
|
|
m_io = io;
|
|
|
|
// Create TTS engine
|
|
m_ttsEngine = TTSEngineFactory::create();
|
|
if (m_ttsEngine && m_ttsEngine->isAvailable()) {
|
|
m_ttsEngine->setRate(m_ttsRate);
|
|
m_ttsEngine->setVolume(m_ttsVolume);
|
|
m_logger->info("TTS engine initialized");
|
|
} else {
|
|
m_logger->warn("TTS engine not available");
|
|
}
|
|
|
|
if (m_io) {
|
|
grove::SubscriptionConfig config;
|
|
m_io->subscribe("voice:speak", config);
|
|
m_io->subscribe("voice:stop", config);
|
|
m_io->subscribe("voice:listen", config);
|
|
}
|
|
|
|
m_logger->info("VoiceService initialized");
|
|
return true;
|
|
}
|
|
|
|
void VoiceService::configureTTS(bool enabled, int rate, int volume) {
|
|
m_ttsEnabled = enabled;
|
|
m_ttsRate = rate;
|
|
m_ttsVolume = volume;
|
|
|
|
if (m_ttsEngine) {
|
|
m_ttsEngine->setRate(rate);
|
|
m_ttsEngine->setVolume(volume);
|
|
}
|
|
}
|
|
|
|
void VoiceService::configureSTT(bool enabled, const std::string& language,
|
|
const std::string& apiKey) {
|
|
m_sttEnabled = enabled;
|
|
m_language = language;
|
|
|
|
if (!apiKey.empty()) {
|
|
m_sttEngine = STTEngineFactory::create(apiKey);
|
|
if (m_sttEngine) {
|
|
m_sttEngine->setLanguage(language);
|
|
m_logger->info("STT engine configured");
|
|
}
|
|
}
|
|
}
|
|
|
|
void VoiceService::process() {
|
|
processMessages();
|
|
processSpeakQueue();
|
|
}
|
|
|
|
void VoiceService::processMessages() {
|
|
if (!m_io) return;
|
|
|
|
while (m_io->hasMessages() > 0) {
|
|
auto msg = m_io->pullMessage();
|
|
|
|
if (msg.topic == "voice:speak" && msg.data) {
|
|
handleSpeakRequest(*msg.data);
|
|
}
|
|
else if (msg.topic == "voice:stop") {
|
|
if (m_ttsEngine) {
|
|
m_ttsEngine->stop();
|
|
}
|
|
// Clear queue
|
|
while (!m_speakQueue.empty()) m_speakQueue.pop();
|
|
}
|
|
else if (msg.topic == "voice:listen" && m_sttEnabled && m_sttEngine) {
|
|
// STT would be handled here
|
|
// For now just log
|
|
m_logger->debug("STT listen requested");
|
|
}
|
|
}
|
|
}
|
|
|
|
void VoiceService::handleSpeakRequest(const grove::IDataNode& data) {
|
|
std::string text = data.getString("text", "");
|
|
bool priority = data.getBool("priority", false);
|
|
|
|
if (text.empty()) return;
|
|
|
|
if (priority) {
|
|
// Clear queue and stop current speech
|
|
while (!m_speakQueue.empty()) m_speakQueue.pop();
|
|
if (m_ttsEngine) m_ttsEngine->stop();
|
|
}
|
|
|
|
m_speakQueue.push(text);
|
|
}
|
|
|
|
void VoiceService::processSpeakQueue() {
|
|
if (!m_ttsEnabled || !m_ttsEngine || m_speakQueue.empty()) return;
|
|
|
|
// Only speak if not currently speaking
|
|
if (!m_ttsEngine->isSpeaking() && !m_speakQueue.empty()) {
|
|
std::string text = m_speakQueue.front();
|
|
m_speakQueue.pop();
|
|
speak(text);
|
|
}
|
|
}
|
|
|
|
void VoiceService::speak(const std::string& text) {
|
|
if (!m_ttsEngine || !m_ttsEnabled) return;
|
|
|
|
// Publish speaking started
|
|
if (m_io) {
|
|
auto event = std::unique_ptr<grove::IDataNode>(
|
|
new grove::JsonDataNode("event")
|
|
);
|
|
event->setString("text", text.size() > 100 ? text.substr(0, 100) + "..." : text);
|
|
m_io->publish("voice:speaking_started", std::move(event));
|
|
}
|
|
|
|
m_ttsEngine->speak(text, true);
|
|
m_totalSpoken++;
|
|
|
|
m_logger->debug("Speaking");
|
|
}
|
|
|
|
// Phase 7: New STT configuration with full config support
|
|
void VoiceService::configureSTT(const nlohmann::json& sttConfig) {
|
|
m_logger->info("[VoiceService] Configuring STT service (Phase 7)");
|
|
|
|
// Extract enabled flag
|
|
bool enabled = false;
|
|
if (sttConfig.contains("active_mode")) {
|
|
const auto& activeMode = sttConfig["active_mode"];
|
|
enabled = activeMode.value("enabled", true);
|
|
}
|
|
|
|
m_sttEnabled = enabled;
|
|
|
|
if (!enabled) {
|
|
m_logger->info("[VoiceService] STT disabled in config");
|
|
return;
|
|
}
|
|
|
|
// Create and start STT service
|
|
m_sttService = std::make_unique<STTService>(sttConfig);
|
|
|
|
if (!m_sttService->start()) {
|
|
m_logger->error("[VoiceService] Failed to start STT service");
|
|
m_sttService.reset();
|
|
return;
|
|
}
|
|
|
|
m_logger->info("[VoiceService] STT service started");
|
|
|
|
// Setup callbacks for transcription events
|
|
// Note: For MVP Milestone 1, we don't start streaming yet
|
|
// This will be implemented in Milestone 2 (passive mode)
|
|
}
|
|
|
|
// STT event handlers (Phase 7)
|
|
void VoiceService::handleKeyword(const std::string& keyword) {
|
|
m_logger->info("[VoiceService] Keyword detected");
|
|
|
|
// Publish keyword detection event
|
|
if (m_io) {
|
|
auto event = std::unique_ptr<grove::IDataNode>(
|
|
new grove::JsonDataNode("event")
|
|
);
|
|
event->setString("keyword", keyword);
|
|
event->setInt("timestamp", static_cast<int>(std::time(nullptr)));
|
|
m_io->publish("voice:keyword_detected", std::move(event));
|
|
}
|
|
|
|
// Auto-switch to active mode (Phase 7.2)
|
|
if (m_sttService) {
|
|
m_sttService->setMode(STTMode::ACTIVE);
|
|
}
|
|
}
|
|
|
|
void VoiceService::handleTranscription(const std::string& text, STTMode mode) {
|
|
m_logger->info("[VoiceService] Transcription received");
|
|
|
|
// Publish transcription event
|
|
if (m_io) {
|
|
std::string modeStr = (mode == STTMode::PASSIVE ? "passive" : "active");
|
|
auto event = std::unique_ptr<grove::IDataNode>(
|
|
new grove::JsonDataNode("event")
|
|
);
|
|
event->setString("text", text);
|
|
event->setString("mode", modeStr);
|
|
event->setInt("timestamp", static_cast<int>(std::time(nullptr)));
|
|
m_io->publish("voice:transcription", std::move(event));
|
|
}
|
|
}
|
|
|
|
void VoiceService::shutdown() {
|
|
if (m_ttsEngine) {
|
|
m_ttsEngine->stop();
|
|
}
|
|
|
|
if (m_sttService) {
|
|
m_sttService->stop();
|
|
}
|
|
|
|
m_logger->info("[VoiceService] Shutdown");
|
|
}
|
|
|
|
} // namespace aissia
|