aissia/src/services/VoiceService.cpp
StillHammer 3915424d75 feat(wip): Phase 7.1 STT Service Layer - Architecture complète (ne compile pas)
Architecture Phase 7 STT implémentée mais bloquée par conflits de macros
entre GroveEngine (JsonDataNode.h) et spdlog/fmt.

## Nouveau contenu

### Interfaces & Services
- ISTTService.hpp: Interface service STT (modes passive/active, callbacks)
- STTService.{hpp,cpp}: Implémentation service STT avec factory pattern
- VoskSTTEngine.{hpp,cpp}: Engine STT local Vosk (~50MB model)

### Factory Pattern
- STTEngineFactory: Support multi-engines (Vosk, Whisper API, auto-select)
- Fallback automatique Vosk -> Whisper API

### Configuration
- config/voice.json: Config Phase 7 (passive_mode, active_mode, whisper_api)
- Support modèles Vosk locaux + fallback cloud

### Intégration
- VoiceService: Nouvelle méthode configureSTT(json) pour Phase 7
- main.cpp: Chargement config STT depuis voice.json
- CMakeLists.txt: Ajout fichiers + dépendance optionnelle Vosk

## Problème de Compilation

**Bloqué par conflits de macros**:
- JsonDataNode.h (GroveEngine) définit des macros qui polluent 'logger' et 'queue'
- Cause erreurs dans VoiceService.cpp et STTService.cpp
- Voir plans/PHASE7_COMPILATION_ISSUE.md pour diagnostic complet

## Fonctionnalités Implémentées

 Architecture STT complète (service layer + engines)
 Support Vosk local (modèles français)
 Factory pattern avec auto-selection
 Configuration JSON Phase 7
 Callbacks transcription/keywords
 Ne compile pas (macro conflicts)

## Prochaines Étapes

1. Résoudre conflits macros (fixer GroveEngine ou isolation namespace)
2. Phase 7.2: PocketSphinxEngine (keyword spotting "Celuna")
3. Tests intégration STT

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-29 09:01:26 +08:00

229 lines
6.4 KiB
C++

// CRITICAL ORDER: Include system headers before local headers to avoid macro conflicts
#include <nlohmann/json.hpp>
#include <cstdlib>
#include <memory>
#include <string>
#include <queue>
// Include VoiceService.hpp BEFORE spdlog to avoid logger macro conflicts
#include "VoiceService.hpp"
#include "STTService.hpp"
// Include spdlog after VoiceService.hpp
#include <spdlog/sinks/stdout_color_sinks.h>
namespace aissia {
VoiceService::VoiceService() {
m_logger = spdlog::get("VoiceService");
if (!m_logger) {
m_logger = spdlog::stdout_color_mt("VoiceService");
}
}
bool VoiceService::initialize(grove::IIO* io) {
m_io = io;
// Create TTS engine
m_ttsEngine = TTSEngineFactory::create();
if (m_ttsEngine && m_ttsEngine->isAvailable()) {
m_ttsEngine->setRate(m_ttsRate);
m_ttsEngine->setVolume(m_ttsVolume);
m_logger->info("TTS engine initialized");
} else {
m_logger->warn("TTS engine not available");
}
if (m_io) {
grove::SubscriptionConfig config;
m_io->subscribe("voice:speak", config);
m_io->subscribe("voice:stop", config);
m_io->subscribe("voice:listen", config);
}
m_logger->info("VoiceService initialized");
return true;
}
void VoiceService::configureTTS(bool enabled, int rate, int volume) {
m_ttsEnabled = enabled;
m_ttsRate = rate;
m_ttsVolume = volume;
if (m_ttsEngine) {
m_ttsEngine->setRate(rate);
m_ttsEngine->setVolume(volume);
}
}
void VoiceService::configureSTT(bool enabled, const std::string& language,
const std::string& apiKey) {
m_sttEnabled = enabled;
m_language = language;
if (!apiKey.empty()) {
m_sttEngine = STTEngineFactory::create(apiKey);
if (m_sttEngine) {
m_sttEngine->setLanguage(language);
m_logger->info("STT engine configured");
}
}
}
void VoiceService::process() {
processMessages();
processSpeakQueue();
}
void VoiceService::processMessages() {
if (!m_io) return;
while (m_io->hasMessages() > 0) {
auto msg = m_io->pullMessage();
if (msg.topic == "voice:speak" && msg.data) {
handleSpeakRequest(*msg.data);
}
else if (msg.topic == "voice:stop") {
if (m_ttsEngine) {
m_ttsEngine->stop();
}
// Clear queue
while (!m_speakQueue.empty()) m_speakQueue.pop();
}
else if (msg.topic == "voice:listen" && m_sttEnabled && m_sttEngine) {
// STT would be handled here
// For now just log
m_logger->debug("STT listen requested");
}
}
}
void VoiceService::handleSpeakRequest(const grove::IDataNode& data) {
std::string text = data.getString("text", "");
bool priority = data.getBool("priority", false);
if (text.empty()) return;
if (priority) {
// Clear queue and stop current speech
while (!m_speakQueue.empty()) m_speakQueue.pop();
if (m_ttsEngine) m_ttsEngine->stop();
}
m_speakQueue.push(text);
}
void VoiceService::processSpeakQueue() {
if (!m_ttsEnabled || !m_ttsEngine || m_speakQueue.empty()) return;
// Only speak if not currently speaking
if (!m_ttsEngine->isSpeaking() && !m_speakQueue.empty()) {
std::string text = m_speakQueue.front();
m_speakQueue.pop();
speak(text);
}
}
void VoiceService::speak(const std::string& text) {
if (!m_ttsEngine || !m_ttsEnabled) return;
// Publish speaking started
if (m_io) {
auto event = std::unique_ptr<grove::IDataNode>(
new grove::JsonDataNode("event")
);
event->setString("text", text.size() > 100 ? text.substr(0, 100) + "..." : text);
m_io->publish("voice:speaking_started", std::move(event));
}
m_ttsEngine->speak(text, true);
m_totalSpoken++;
m_logger->debug("Speaking");
}
// Phase 7: New STT configuration with full config support
void VoiceService::configureSTT(const nlohmann::json& sttConfig) {
m_logger->info("[VoiceService] Configuring STT service (Phase 7)");
// Extract enabled flag
bool enabled = false;
if (sttConfig.contains("active_mode")) {
const auto& activeMode = sttConfig["active_mode"];
enabled = activeMode.value("enabled", true);
}
m_sttEnabled = enabled;
if (!enabled) {
m_logger->info("[VoiceService] STT disabled in config");
return;
}
// Create and start STT service
m_sttService = std::make_unique<STTService>(sttConfig);
if (!m_sttService->start()) {
m_logger->error("[VoiceService] Failed to start STT service");
m_sttService.reset();
return;
}
m_logger->info("[VoiceService] STT service started");
// Setup callbacks for transcription events
// Note: For MVP Milestone 1, we don't start streaming yet
// This will be implemented in Milestone 2 (passive mode)
}
// STT event handlers (Phase 7)
void VoiceService::handleKeyword(const std::string& keyword) {
m_logger->info("[VoiceService] Keyword detected");
// Publish keyword detection event
if (m_io) {
auto event = std::unique_ptr<grove::IDataNode>(
new grove::JsonDataNode("event")
);
event->setString("keyword", keyword);
event->setInt("timestamp", static_cast<int>(std::time(nullptr)));
m_io->publish("voice:keyword_detected", std::move(event));
}
// Auto-switch to active mode (Phase 7.2)
if (m_sttService) {
m_sttService->setMode(STTMode::ACTIVE);
}
}
void VoiceService::handleTranscription(const std::string& text, STTMode mode) {
m_logger->info("[VoiceService] Transcription received");
// Publish transcription event
if (m_io) {
std::string modeStr = (mode == STTMode::PASSIVE ? "passive" : "active");
auto event = std::unique_ptr<grove::IDataNode>(
new grove::JsonDataNode("event")
);
event->setString("text", text);
event->setString("mode", modeStr);
event->setInt("timestamp", static_cast<int>(std::time(nullptr)));
m_io->publish("voice:transcription", std::move(event));
}
}
void VoiceService::shutdown() {
if (m_ttsEngine) {
m_ttsEngine->stop();
}
if (m_sttService) {
m_sttService->stop();
}
m_logger->info("[VoiceService] Shutdown");
}
} // namespace aissia