#include "WhisperClient.h" #include "../audio/AudioBuffer.h" #include #include #include #include #include using json = nlohmann::json; namespace secondvoice { WhisperClient::WhisperClient(const std::string& api_key) : api_key_(api_key) { } std::optional WhisperClient::transcribe( const std::vector& audio_data, int sample_rate, int channels, const std::string& model, const std::string& language, float temperature, const std::string& prompt, const std::string& response_format) { // Save audio to temporary WAV file AudioBuffer buffer(sample_rate, channels); buffer.addSamples(audio_data); std::string temp_file = "/tmp/secondvoice_temp.wav"; if (!buffer.saveToWav(temp_file)) { std::cerr << "Failed to save temporary WAV file" << std::endl; return std::nullopt; } // Read WAV file std::ifstream file(temp_file, std::ios::binary); if (!file.is_open()) { std::cerr << "Failed to open temporary WAV file" << std::endl; return std::nullopt; } std::ostringstream wav_stream; wav_stream << file.rdbuf(); std::string wav_data = wav_stream.str(); file.close(); // Make HTTP request httplib::Client client("https://api.openai.com"); client.set_read_timeout(30, 0); // 30 seconds timeout httplib::Headers headers = { {"Authorization", "Bearer " + api_key_} }; httplib::UploadFormDataItems items; items.push_back({"file", wav_data, "audio.wav", "audio/wav"}); items.push_back({"model", model, "", ""}); items.push_back({"language", language, "", ""}); items.push_back({"temperature", std::to_string(temperature), "", ""}); items.push_back({"response_format", response_format, "", ""}); // Add prompt if provided if (!prompt.empty()) { items.push_back({"prompt", prompt, "", ""}); } auto res = client.Post("/v1/audio/transcriptions", headers, items); if (!res) { std::cerr << "Whisper API request failed: " << httplib::to_string(res.error()) << std::endl; return std::nullopt; } if (res->status != 200) { std::cerr << "Whisper API error " << res->status << ": " << res->body << std::endl; return std::nullopt; } // Parse response try { json response_json = json::parse(res->body); WhisperResponse response; response.text = response_json["text"].get(); return response; } catch (const json::exception& e) { std::cerr << "Failed to parse Whisper response: " << e.what() << std::endl; return std::nullopt; } } } // namespace secondvoice