#pragma once #include #include #include #include namespace aissia { /** * @brief Callback for transcription results (low-level engine) */ using STTEngineCallback = std::function; /** * @brief Interface for Speech-to-Text engines * * Implementations: * - WhisperAPIEngine: OpenAI Whisper API */ class ISTTEngine { public: virtual ~ISTTEngine() = default; /** * @brief Transcribe audio data * @param audioData PCM audio samples (16-bit, 16kHz, mono) * @return Transcribed text */ virtual std::string transcribe(const std::vector& audioData) = 0; /** * @brief Transcribe audio file * @param filePath Path to audio file (wav, mp3, etc.) * @return Transcribed text */ virtual std::string transcribeFile(const std::string& filePath) = 0; /** * @brief Set language for transcription * @param language ISO 639-1 code (e.g., "fr", "en") */ virtual void setLanguage(const std::string& language) = 0; /** * @brief Check if engine is available */ virtual bool isAvailable() const = 0; /** * @brief Get engine name */ virtual std::string getEngineName() const = 0; }; /** * @brief Factory to create STT engine */ class STTEngineFactory { public: // Legacy API (for backward compatibility) static std::unique_ptr create(const std::string& apiKey); // New API with engine type and config static std::unique_ptr create(const std::string& type, const std::string& modelPath, const std::string& apiKey = ""); }; } // namespace aissia