feat: Major improvements - WinHTTP, gpt-4o-mini, Opus, sliding window
- Replace cpp-httplib with native WinHTTP for HTTPS support - Switch from whisper-1 to gpt-4o-mini-transcribe model - Use Opus/OGG encoding instead of WAV (~10x smaller files) - Implement sliding window audio capture with overlap - Add transcription deduplication for overlapping segments - Add Voice Activity Detection (VAD) to filter silence/noise - Filter Whisper hallucinations (Amara.org, etc.) - Add UTF-8 console support for Chinese characters - Add Chinese font loading in ImGui - Make Claude responses concise (translation only, no explanations) - Configurable window size, font size, chunk duration/step 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
089acbfff1
commit
fa8ea2907b
@ -56,7 +56,6 @@ target_include_directories(imgui_backends PUBLIC
|
|||||||
|
|
||||||
# Find packages (no more vcpkg imgui!)
|
# Find packages (no more vcpkg imgui!)
|
||||||
find_package(portaudio CONFIG REQUIRED)
|
find_package(portaudio CONFIG REQUIRED)
|
||||||
find_package(httplib CONFIG REQUIRED)
|
|
||||||
find_package(nlohmann_json CONFIG REQUIRED)
|
find_package(nlohmann_json CONFIG REQUIRED)
|
||||||
find_package(glfw3 CONFIG REQUIRED)
|
find_package(glfw3 CONFIG REQUIRED)
|
||||||
find_package(glad CONFIG REQUIRED)
|
find_package(glad CONFIG REQUIRED)
|
||||||
@ -74,6 +73,7 @@ set(SOURCES_UI
|
|||||||
# API clients
|
# API clients
|
||||||
src/api/WhisperClient.cpp
|
src/api/WhisperClient.cpp
|
||||||
src/api/ClaudeClient.cpp
|
src/api/ClaudeClient.cpp
|
||||||
|
src/api/WinHttpClient.cpp
|
||||||
# UI
|
# UI
|
||||||
src/ui/TranslationUI.cpp
|
src/ui/TranslationUI.cpp
|
||||||
# Utils
|
# Utils
|
||||||
@ -91,6 +91,7 @@ set(SOURCES_CONSOLE
|
|||||||
# API clients
|
# API clients
|
||||||
src/api/WhisperClient.cpp
|
src/api/WhisperClient.cpp
|
||||||
src/api/ClaudeClient.cpp
|
src/api/ClaudeClient.cpp
|
||||||
|
src/api/WinHttpClient.cpp
|
||||||
# Utils
|
# Utils
|
||||||
src/utils/Config.cpp
|
src/utils/Config.cpp
|
||||||
# Core - WAIT, Pipeline uses UI!
|
# Core - WAIT, Pipeline uses UI!
|
||||||
@ -116,7 +117,6 @@ if(MINGW AND NOT BUILD_SHARED_LIBS)
|
|||||||
# Static linking for MinGW - need to add Windows system libs for portaudio
|
# Static linking for MinGW - need to add Windows system libs for portaudio
|
||||||
target_link_libraries(${PROJECT_NAME} PRIVATE
|
target_link_libraries(${PROJECT_NAME} PRIVATE
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/vcpkg_installed/x64-mingw-static/lib/libportaudio.a
|
${CMAKE_CURRENT_BINARY_DIR}/vcpkg_installed/x64-mingw-static/lib/libportaudio.a
|
||||||
httplib::httplib
|
|
||||||
nlohmann_json::nlohmann_json
|
nlohmann_json::nlohmann_json
|
||||||
imgui_backends
|
imgui_backends
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/vcpkg_installed/x64-mingw-static/lib/libglfw3.a
|
${CMAKE_CURRENT_BINARY_DIR}/vcpkg_installed/x64-mingw-static/lib/libglfw3.a
|
||||||
@ -124,25 +124,27 @@ if(MINGW AND NOT BUILD_SHARED_LIBS)
|
|||||||
OpenGL::GL
|
OpenGL::GL
|
||||||
opus
|
opus
|
||||||
ogg
|
ogg
|
||||||
# Windows system libraries for portaudio
|
# Windows system libraries
|
||||||
winmm
|
winmm
|
||||||
setupapi
|
setupapi
|
||||||
|
ws2_32
|
||||||
|
winhttp
|
||||||
)
|
)
|
||||||
# Console version - NO UI libs
|
# Console version - NO UI libs
|
||||||
target_link_libraries(${PROJECT_NAME}_Console PRIVATE
|
target_link_libraries(${PROJECT_NAME}_Console PRIVATE
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/vcpkg_installed/x64-mingw-static/lib/libportaudio.a
|
${CMAKE_CURRENT_BINARY_DIR}/vcpkg_installed/x64-mingw-static/lib/libportaudio.a
|
||||||
httplib::httplib
|
|
||||||
nlohmann_json::nlohmann_json
|
nlohmann_json::nlohmann_json
|
||||||
opus
|
opus
|
||||||
ogg
|
ogg
|
||||||
# Windows system libraries for portaudio
|
# Windows system libraries
|
||||||
winmm
|
winmm
|
||||||
setupapi
|
setupapi
|
||||||
|
ws2_32
|
||||||
|
winhttp
|
||||||
)
|
)
|
||||||
else()
|
else()
|
||||||
target_link_libraries(${PROJECT_NAME} PRIVATE
|
target_link_libraries(${PROJECT_NAME} PRIVATE
|
||||||
portaudio
|
portaudio_static
|
||||||
httplib::httplib
|
|
||||||
nlohmann_json::nlohmann_json
|
nlohmann_json::nlohmann_json
|
||||||
imgui_backends
|
imgui_backends
|
||||||
glfw
|
glfw
|
||||||
@ -153,8 +155,7 @@ else()
|
|||||||
)
|
)
|
||||||
# Console version - NO UI libs
|
# Console version - NO UI libs
|
||||||
target_link_libraries(${PROJECT_NAME}_Console PRIVATE
|
target_link_libraries(${PROJECT_NAME}_Console PRIVATE
|
||||||
portaudio
|
portaudio_static
|
||||||
httplib::httplib
|
|
||||||
nlohmann_json::nlohmann_json
|
nlohmann_json::nlohmann_json
|
||||||
opus
|
opus
|
||||||
ogg
|
ogg
|
||||||
@ -196,6 +197,12 @@ endif()
|
|||||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.json
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.json
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/config.json COPYONLY)
|
${CMAKE_CURRENT_BINARY_DIR}/config.json COPYONLY)
|
||||||
|
|
||||||
|
# Copy .env file if it exists
|
||||||
|
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.env)
|
||||||
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/.env
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/.env COPYONLY)
|
||||||
|
endif()
|
||||||
|
|
||||||
# Install target
|
# Install target
|
||||||
install(TARGETS ${PROJECT_NAME} DESTINATION bin)
|
install(TARGETS ${PROJECT_NAME} DESTINATION bin)
|
||||||
install(FILES config.json DESTINATION bin)
|
install(FILES config.json DESTINATION bin)
|
||||||
|
|||||||
15
config.json
15
config.json
@ -3,26 +3,27 @@
|
|||||||
"sample_rate": 16000,
|
"sample_rate": 16000,
|
||||||
"channels": 1,
|
"channels": 1,
|
||||||
"chunk_duration_seconds": 10,
|
"chunk_duration_seconds": 10,
|
||||||
"format": "wav"
|
"chunk_step_seconds": 5,
|
||||||
|
"format": "ogg"
|
||||||
},
|
},
|
||||||
"whisper": {
|
"whisper": {
|
||||||
"model": "gpt-4o-mini-transcribe",
|
"model": "gpt-4o-mini-transcribe",
|
||||||
"language": "zh",
|
"language": "zh",
|
||||||
"temperature": 0.0,
|
"temperature": 0.0,
|
||||||
"prompt": "The following is a conversation in Mandarin Chinese about business, family, and daily life. Common names: Tingting, Alexis.",
|
"prompt": "The following is a conversation in Mandarin Chinese about business, family, and daily life. Common names: Tingting, Alexis.",
|
||||||
"stream": true,
|
"stream": false,
|
||||||
"response_format": "text"
|
"response_format": "text"
|
||||||
},
|
},
|
||||||
"claude": {
|
"claude": {
|
||||||
"model": "claude-haiku-4-20250514",
|
"model": "claude-3-5-haiku-20241022",
|
||||||
"max_tokens": 1024,
|
"max_tokens": 1024,
|
||||||
"temperature": 0.3,
|
"temperature": 0.3,
|
||||||
"system_prompt": "Tu es un traducteur professionnel chinois-français. Traduis le texte suivant de manière naturelle et contextuelle."
|
"system_prompt": "Tu es un traducteur chinois-français. Réponds UNIQUEMENT avec la traduction française, sans explications, notes, commentaires ou alternatives. Une seule phrase traduite, rien d'autre."
|
||||||
},
|
},
|
||||||
"ui": {
|
"ui": {
|
||||||
"window_width": 800,
|
"window_width": 1200,
|
||||||
"window_height": 600,
|
"window_height": 800,
|
||||||
"font_size": 16,
|
"font_size": 24,
|
||||||
"max_display_lines": 50
|
"max_display_lines": 50
|
||||||
},
|
},
|
||||||
"recording": {
|
"recording": {
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
@echo off
|
@echo off
|
||||||
|
setlocal enabledelayedexpansion
|
||||||
REM MinGW Setup Script for SecondVoice
|
REM MinGW Setup Script for SecondVoice
|
||||||
REM This installs a lightweight compiler instead of Visual Studio
|
REM This installs a lightweight compiler instead of Visual Studio
|
||||||
|
|
||||||
@ -7,18 +8,18 @@ echo SecondVoice - MinGW Setup
|
|||||||
echo ========================================
|
echo ========================================
|
||||||
echo.
|
echo.
|
||||||
echo This script will install:
|
echo This script will install:
|
||||||
echo - MinGW-w64 (GCC compiler for Windows)
|
echo - MinGW-w64 - GCC compiler for Windows
|
||||||
echo - CMake (build system)
|
echo - CMake - build system
|
||||||
echo - Ninja (build tool)
|
echo - Ninja - build tool
|
||||||
echo - Git (if not installed)
|
echo - Git if not installed
|
||||||
echo.
|
echo.
|
||||||
echo Total size: ~500MB (vs 10GB+ for Visual Studio!)
|
echo Total size: ~500MB vs 10GB+ for Visual Studio
|
||||||
echo.
|
echo.
|
||||||
pause
|
pause
|
||||||
|
|
||||||
REM Check if running as admin
|
REM Check if running as admin
|
||||||
net session >nul 2>&1
|
net session >nul 2>&1
|
||||||
if %errorlevel% neq 0 (
|
if !errorlevel! neq 0 (
|
||||||
echo [WARNING] Not running as administrator
|
echo [WARNING] Not running as administrator
|
||||||
echo Some installations might fail. Recommended to run as admin.
|
echo Some installations might fail. Recommended to run as admin.
|
||||||
echo.
|
echo.
|
||||||
@ -27,7 +28,7 @@ if %errorlevel% neq 0 (
|
|||||||
|
|
||||||
REM Install chocolatey if not present
|
REM Install chocolatey if not present
|
||||||
where choco >nul 2>&1
|
where choco >nul 2>&1
|
||||||
if %errorlevel% neq 0 (
|
if !errorlevel! neq 0 (
|
||||||
echo [INFO] Installing Chocolatey package manager...
|
echo [INFO] Installing Chocolatey package manager...
|
||||||
echo.
|
echo.
|
||||||
|
|
||||||
@ -37,7 +38,7 @@ if %errorlevel% neq 0 (
|
|||||||
set "PATH=%PATH%;C:\ProgramData\chocolatey\bin"
|
set "PATH=%PATH%;C:\ProgramData\chocolatey\bin"
|
||||||
|
|
||||||
where choco >nul 2>&1
|
where choco >nul 2>&1
|
||||||
if %errorlevel% neq 0 (
|
if !errorlevel! neq 0 (
|
||||||
echo.
|
echo.
|
||||||
echo ========================================
|
echo ========================================
|
||||||
echo [IMPORTANT] Chocolatey installed successfully!
|
echo [IMPORTANT] Chocolatey installed successfully!
|
||||||
@ -59,11 +60,11 @@ echo.
|
|||||||
|
|
||||||
REM Install MinGW-w64
|
REM Install MinGW-w64
|
||||||
where gcc >nul 2>&1
|
where gcc >nul 2>&1
|
||||||
if %errorlevel% neq 0 (
|
if !errorlevel! neq 0 (
|
||||||
echo [INFO] Installing MinGW-w64 (GCC compiler)...
|
echo [INFO] Installing MinGW-w64...
|
||||||
choco install mingw -y
|
choco install mingw -y
|
||||||
|
|
||||||
if %errorlevel% neq 0 (
|
if !errorlevel! neq 0 (
|
||||||
echo [ERROR] Failed to install MinGW
|
echo [ERROR] Failed to install MinGW
|
||||||
pause
|
pause
|
||||||
exit /b 1
|
exit /b 1
|
||||||
@ -79,11 +80,11 @@ if %errorlevel% neq 0 (
|
|||||||
|
|
||||||
REM Install CMake
|
REM Install CMake
|
||||||
where cmake >nul 2>&1
|
where cmake >nul 2>&1
|
||||||
if %errorlevel% neq 0 (
|
if !errorlevel! neq 0 (
|
||||||
echo [INFO] Installing CMake...
|
echo [INFO] Installing CMake...
|
||||||
choco install cmake -y --installargs 'ADD_CMAKE_TO_PATH=System'
|
choco install cmake -y --installargs 'ADD_CMAKE_TO_PATH=System'
|
||||||
|
|
||||||
if %errorlevel% neq 0 (
|
if !errorlevel! neq 0 (
|
||||||
echo [ERROR] Failed to install CMake
|
echo [ERROR] Failed to install CMake
|
||||||
pause
|
pause
|
||||||
exit /b 1
|
exit /b 1
|
||||||
@ -99,11 +100,11 @@ if %errorlevel% neq 0 (
|
|||||||
|
|
||||||
REM Install Ninja
|
REM Install Ninja
|
||||||
where ninja >nul 2>&1
|
where ninja >nul 2>&1
|
||||||
if %errorlevel% neq 0 (
|
if !errorlevel! neq 0 (
|
||||||
echo [INFO] Installing Ninja...
|
echo [INFO] Installing Ninja...
|
||||||
choco install ninja -y
|
choco install ninja -y
|
||||||
|
|
||||||
if %errorlevel% neq 0 (
|
if !errorlevel! neq 0 (
|
||||||
echo [ERROR] Failed to install Ninja
|
echo [ERROR] Failed to install Ninja
|
||||||
pause
|
pause
|
||||||
exit /b 1
|
exit /b 1
|
||||||
@ -119,11 +120,11 @@ if %errorlevel% neq 0 (
|
|||||||
|
|
||||||
REM Install Git (if not present)
|
REM Install Git (if not present)
|
||||||
where git >nul 2>&1
|
where git >nul 2>&1
|
||||||
if %errorlevel% neq 0 (
|
if !errorlevel! neq 0 (
|
||||||
echo [INFO] Installing Git...
|
echo [INFO] Installing Git...
|
||||||
choco install git -y
|
choco install git -y
|
||||||
|
|
||||||
if %errorlevel% neq 0 (
|
if !errorlevel! neq 0 (
|
||||||
echo [ERROR] Failed to install Git
|
echo [ERROR] Failed to install Git
|
||||||
pause
|
pause
|
||||||
exit /b 1
|
exit /b 1
|
||||||
@ -146,7 +147,7 @@ if not defined VCPKG_ROOT (
|
|||||||
cd vcpkg
|
cd vcpkg
|
||||||
call bootstrap-vcpkg.bat
|
call bootstrap-vcpkg.bat
|
||||||
|
|
||||||
if %errorlevel% neq 0 (
|
if !errorlevel! neq 0 (
|
||||||
echo [ERROR] Failed to bootstrap vcpkg
|
echo [ERROR] Failed to bootstrap vcpkg
|
||||||
pause
|
pause
|
||||||
exit /b 1
|
exit /b 1
|
||||||
@ -162,7 +163,7 @@ if not defined VCPKG_ROOT (
|
|||||||
echo [SUCCESS] VCPKG_ROOT set to C:\vcpkg
|
echo [SUCCESS] VCPKG_ROOT set to C:\vcpkg
|
||||||
echo.
|
echo.
|
||||||
) else (
|
) else (
|
||||||
echo [INFO] vcpkg already configured at: %VCPKG_ROOT%
|
echo [INFO] vcpkg already configured at: !VCPKG_ROOT!
|
||||||
echo.
|
echo.
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -175,16 +176,16 @@ where gcc
|
|||||||
where cmake
|
where cmake
|
||||||
where ninja
|
where ninja
|
||||||
where git
|
where git
|
||||||
echo VCPKG_ROOT=%VCPKG_ROOT%
|
echo VCPKG_ROOT=!VCPKG_ROOT!
|
||||||
echo.
|
echo.
|
||||||
echo ========================================
|
echo ========================================
|
||||||
echo Next Steps:
|
echo Next Steps:
|
||||||
echo ========================================
|
echo ========================================
|
||||||
echo 1. Close and reopen this terminal (to reload PATH)
|
echo 1. Close and reopen this terminal to reload PATH
|
||||||
echo 2. Run: build_mingw.bat --release
|
echo 2. Run: build_mingw.bat --release
|
||||||
echo 3. Your .exe will be in: build\mingw-release\SecondVoice.exe
|
echo 3. Your .exe will be in: build\mingw-release\SecondVoice.exe
|
||||||
echo.
|
echo.
|
||||||
echo Total installation size: ~500MB
|
echo Total installation size: ~500MB
|
||||||
echo (vs 10GB+ for Visual Studio!)
|
echo vs 10GB+ for Visual Studio!
|
||||||
echo.
|
echo.
|
||||||
pause
|
pause
|
||||||
|
|||||||
@ -1,6 +1,5 @@
|
|||||||
#include "ClaudeClient.h"
|
#include "ClaudeClient.h"
|
||||||
#include "../mingw_compat.h"
|
#include "WinHttpClient.h"
|
||||||
#include <httplib.h>
|
|
||||||
#include <nlohmann/json.hpp>
|
#include <nlohmann/json.hpp>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
@ -37,40 +36,37 @@ std::optional<ClaudeResponse> ClaudeClient::translate(
|
|||||||
|
|
||||||
std::string request_body = request_json.dump();
|
std::string request_body = request_json.dump();
|
||||||
|
|
||||||
// Make HTTP request
|
WinHttpClient client;
|
||||||
httplib::Client client("https://api.anthropic.com");
|
std::map<std::string, std::string> headers = {
|
||||||
client.set_read_timeout(15, 0); // 15 seconds timeout
|
|
||||||
|
|
||||||
httplib::Headers headers = {
|
|
||||||
{"x-api-key", api_key_},
|
{"x-api-key", api_key_},
|
||||||
{"anthropic-version", API_VERSION},
|
{"anthropic-version", API_VERSION},
|
||||||
{"content-type", "application/json"}
|
{"content-type", "application/json"}
|
||||||
};
|
};
|
||||||
|
|
||||||
auto res = client.Post("/v1/messages", headers, request_body, "application/json");
|
auto response = client.postJson("api.anthropic.com", "/v1/messages", request_body, headers);
|
||||||
|
|
||||||
if (!res) {
|
if (!response) {
|
||||||
std::cerr << "Claude API request failed: " << httplib::to_string(res.error()) << std::endl;
|
std::cerr << "Claude API request failed" << std::endl;
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (res->status != 200) {
|
if (response->statusCode != 200) {
|
||||||
std::cerr << "Claude API error " << res->status << ": " << res->body << std::endl;
|
std::cerr << "Claude API error " << response->statusCode << ": " << response->body << std::endl;
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse response
|
// Parse response
|
||||||
try {
|
try {
|
||||||
json response_json = json::parse(res->body);
|
json response_json = json::parse(response->body);
|
||||||
|
|
||||||
if (!response_json.contains("content") || !response_json["content"].is_array()) {
|
if (!response_json.contains("content") || !response_json["content"].is_array()) {
|
||||||
std::cerr << "Invalid Claude API response format" << std::endl;
|
std::cerr << "Invalid Claude API response format" << std::endl;
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
ClaudeResponse response;
|
ClaudeResponse result;
|
||||||
response.text = response_json["content"][0]["text"].get<std::string>();
|
result.text = response_json["content"][0]["text"].get<std::string>();
|
||||||
return response;
|
return result;
|
||||||
} catch (const json::exception& e) {
|
} catch (const json::exception& e) {
|
||||||
std::cerr << "Failed to parse Claude response: " << e.what() << std::endl;
|
std::cerr << "Failed to parse Claude response: " << e.what() << std::endl;
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
|
|||||||
@ -23,7 +23,7 @@ public:
|
|||||||
private:
|
private:
|
||||||
std::string api_key_;
|
std::string api_key_;
|
||||||
static constexpr const char* API_URL = "https://api.anthropic.com/v1/messages";
|
static constexpr const char* API_URL = "https://api.anthropic.com/v1/messages";
|
||||||
static constexpr const char* MODEL = "claude-haiku-4-20250514";
|
static constexpr const char* MODEL = "claude-3-5-haiku-20241022";
|
||||||
static constexpr const char* API_VERSION = "2023-06-01";
|
static constexpr const char* API_VERSION = "2023-06-01";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -1,10 +1,8 @@
|
|||||||
#include "WhisperClient.h"
|
#include "WhisperClient.h"
|
||||||
|
#include "WinHttpClient.h"
|
||||||
#include "../audio/AudioBuffer.h"
|
#include "../audio/AudioBuffer.h"
|
||||||
#include "../mingw_compat.h"
|
|
||||||
#include <httplib.h>
|
|
||||||
#include <nlohmann/json.hpp>
|
#include <nlohmann/json.hpp>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <sstream>
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
|
||||||
using json = nlohmann::json;
|
using json = nlohmann::json;
|
||||||
@ -25,70 +23,93 @@ std::optional<WhisperResponse> WhisperClient::transcribe(
|
|||||||
const std::string& prompt,
|
const std::string& prompt,
|
||||||
const std::string& response_format) {
|
const std::string& response_format) {
|
||||||
|
|
||||||
// Save audio to temporary Opus file
|
// Save audio to temporary Opus/OGG file (smaller than WAV)
|
||||||
AudioBuffer buffer(sample_rate, channels);
|
AudioBuffer buffer(sample_rate, channels);
|
||||||
buffer.addSamples(audio_data);
|
buffer.addSamples(audio_data);
|
||||||
|
|
||||||
std::string temp_file = "secondvoice_temp.opus";
|
std::string temp_file = "secondvoice_temp.ogg";
|
||||||
if (!buffer.saveToOpus(temp_file)) {
|
if (!buffer.saveToOpus(temp_file)) {
|
||||||
std::cerr << "Failed to save temporary Opus file" << std::endl;
|
std::cerr << "Failed to save temporary Opus file" << std::endl;
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read Opus file
|
std::cout << "Saved Opus file: " << temp_file << std::endl;
|
||||||
std::ifstream file(temp_file, std::ios::binary);
|
|
||||||
|
// Read file into memory
|
||||||
|
std::ifstream file(temp_file, std::ios::binary | std::ios::ate);
|
||||||
if (!file.is_open()) {
|
if (!file.is_open()) {
|
||||||
std::cerr << "Failed to open temporary Opus file" << std::endl;
|
std::cerr << "Failed to open temporary WAV file" << std::endl;
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::ostringstream opus_stream;
|
std::streamsize size = file.tellg();
|
||||||
opus_stream << file.rdbuf();
|
file.seekg(0, std::ios::beg);
|
||||||
std::string opus_data = opus_stream.str();
|
std::vector<char> fileData(size);
|
||||||
|
if (!file.read(fileData.data(), size)) {
|
||||||
|
std::cerr << "Failed to read temporary WAV file" << std::endl;
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
file.close();
|
file.close();
|
||||||
|
|
||||||
// Make HTTP request
|
// Prepare form fields
|
||||||
httplib::Client client("https://api.openai.com");
|
std::map<std::string, std::string> fields = {
|
||||||
client.set_read_timeout(30, 0); // 30 seconds timeout
|
{"model", model},
|
||||||
|
{"language", language},
|
||||||
|
{"response_format", response_format}
|
||||||
|
};
|
||||||
|
|
||||||
httplib::Headers headers = {
|
// Temperature only supported by whisper-1
|
||||||
|
if (model == "whisper-1") {
|
||||||
|
fields["temperature"] = std::to_string(temperature);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!prompt.empty()) {
|
||||||
|
fields["prompt"] = prompt;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make request
|
||||||
|
WinHttpClient client;
|
||||||
|
std::map<std::string, std::string> headers = {
|
||||||
{"Authorization", "Bearer " + api_key_}
|
{"Authorization", "Bearer " + api_key_}
|
||||||
};
|
};
|
||||||
|
|
||||||
httplib::UploadFormDataItems items;
|
auto response = client.postMultipart(
|
||||||
items.push_back({"file", opus_data, "audio.opus", "audio/ogg"});
|
"api.openai.com",
|
||||||
items.push_back({"model", model, "", ""});
|
"/v1/audio/transcriptions",
|
||||||
items.push_back({"language", language, "", ""});
|
fields,
|
||||||
items.push_back({"temperature", std::to_string(temperature), "", ""});
|
"file",
|
||||||
items.push_back({"response_format", response_format, "", ""});
|
"audio.ogg",
|
||||||
|
"audio/ogg",
|
||||||
|
fileData,
|
||||||
|
headers
|
||||||
|
);
|
||||||
|
|
||||||
// Add prompt if provided
|
if (!response) {
|
||||||
if (!prompt.empty()) {
|
std::cerr << "Whisper API request failed" << std::endl;
|
||||||
items.push_back({"prompt", prompt, "", ""});
|
|
||||||
}
|
|
||||||
|
|
||||||
auto res = client.Post("/v1/audio/transcriptions", headers, items);
|
|
||||||
|
|
||||||
if (!res) {
|
|
||||||
std::cerr << "Whisper API request failed: " << httplib::to_string(res.error()) << std::endl;
|
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (res->status != 200) {
|
if (response->statusCode != 200) {
|
||||||
std::cerr << "Whisper API error " << res->status << ": " << res->body << std::endl;
|
std::cerr << "Whisper API error " << response->statusCode << ": " << response->body << std::endl;
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse response
|
// Parse response - for "text" format, response is plain text
|
||||||
|
// For "json" format, it's JSON
|
||||||
|
WhisperResponse result;
|
||||||
|
if (response_format == "text") {
|
||||||
|
result.text = response->body;
|
||||||
|
} else {
|
||||||
try {
|
try {
|
||||||
json response_json = json::parse(res->body);
|
json response_json = json::parse(response->body);
|
||||||
WhisperResponse response;
|
result.text = response_json["text"].get<std::string>();
|
||||||
response.text = response_json["text"].get<std::string>();
|
|
||||||
return response;
|
|
||||||
} catch (const json::exception& e) {
|
} catch (const json::exception& e) {
|
||||||
std::cerr << "Failed to parse Whisper response: " << e.what() << std::endl;
|
std::cerr << "Failed to parse Whisper response: " << e.what() << std::endl;
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace secondvoice
|
} // namespace secondvoice
|
||||||
|
|||||||
301
src/api/WinHttpClient.cpp
Normal file
301
src/api/WinHttpClient.cpp
Normal file
@ -0,0 +1,301 @@
|
|||||||
|
#include "WinHttpClient.h"
|
||||||
|
#include <iostream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <random>
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#pragma comment(lib, "winhttp.lib")
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace secondvoice {
|
||||||
|
|
||||||
|
WinHttpClient::WinHttpClient() {
|
||||||
|
#ifdef _WIN32
|
||||||
|
hSession_ = WinHttpOpen(
|
||||||
|
L"SecondVoice/1.0",
|
||||||
|
WINHTTP_ACCESS_TYPE_DEFAULT_PROXY,
|
||||||
|
WINHTTP_NO_PROXY_NAME,
|
||||||
|
WINHTTP_NO_PROXY_BYPASS,
|
||||||
|
0
|
||||||
|
);
|
||||||
|
if (!hSession_) {
|
||||||
|
std::cerr << "WinHttpOpen failed: " << GetLastError() << std::endl;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
WinHttpClient::~WinHttpClient() {
|
||||||
|
#ifdef _WIN32
|
||||||
|
if (hSession_) {
|
||||||
|
WinHttpCloseHandle(hSession_);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string WinHttpClient::generateBoundary() {
|
||||||
|
std::random_device rd;
|
||||||
|
std::mt19937 gen(rd());
|
||||||
|
std::uniform_int_distribution<> dis(0, 15);
|
||||||
|
const char* hex = "0123456789abcdef";
|
||||||
|
|
||||||
|
std::string boundary = "----WebKitFormBoundary";
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
boundary += hex[dis(gen)];
|
||||||
|
}
|
||||||
|
return boundary;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
static std::wstring toWideString(const std::string& str) {
|
||||||
|
if (str.empty()) return L"";
|
||||||
|
int size = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, nullptr, 0);
|
||||||
|
std::wstring result(size - 1, 0);
|
||||||
|
MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, &result[0], size);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
std::optional<HttpResponse> WinHttpClient::postJson(
|
||||||
|
const std::string& host,
|
||||||
|
const std::string& path,
|
||||||
|
const std::string& jsonBody,
|
||||||
|
const std::map<std::string, std::string>& headers) {
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
if (!hSession_) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::wstring wHost = toWideString(host);
|
||||||
|
HINTERNET hConnect = WinHttpConnect(hSession_, wHost.c_str(), INTERNET_DEFAULT_HTTPS_PORT, 0);
|
||||||
|
if (!hConnect) {
|
||||||
|
std::cerr << "WinHttpConnect failed: " << GetLastError() << std::endl;
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::wstring wPath = toWideString(path);
|
||||||
|
HINTERNET hRequest = WinHttpOpenRequest(
|
||||||
|
hConnect,
|
||||||
|
L"POST",
|
||||||
|
wPath.c_str(),
|
||||||
|
nullptr,
|
||||||
|
WINHTTP_NO_REFERER,
|
||||||
|
WINHTTP_DEFAULT_ACCEPT_TYPES,
|
||||||
|
WINHTTP_FLAG_SECURE
|
||||||
|
);
|
||||||
|
if (!hRequest) {
|
||||||
|
std::cerr << "WinHttpOpenRequest failed: " << GetLastError() << std::endl;
|
||||||
|
WinHttpCloseHandle(hConnect);
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add headers
|
||||||
|
for (const auto& [key, value] : headers) {
|
||||||
|
std::wstring header = toWideString(key + ": " + value);
|
||||||
|
WinHttpAddRequestHeaders(hRequest, header.c_str(), -1, WINHTTP_ADDREQ_FLAG_ADD);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send request
|
||||||
|
BOOL result = WinHttpSendRequest(
|
||||||
|
hRequest,
|
||||||
|
WINHTTP_NO_ADDITIONAL_HEADERS,
|
||||||
|
0,
|
||||||
|
(LPVOID)jsonBody.c_str(),
|
||||||
|
jsonBody.length(),
|
||||||
|
jsonBody.length(),
|
||||||
|
0
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!result) {
|
||||||
|
std::cerr << "WinHttpSendRequest failed: " << GetLastError() << std::endl;
|
||||||
|
WinHttpCloseHandle(hRequest);
|
||||||
|
WinHttpCloseHandle(hConnect);
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
result = WinHttpReceiveResponse(hRequest, nullptr);
|
||||||
|
if (!result) {
|
||||||
|
std::cerr << "WinHttpReceiveResponse failed: " << GetLastError() << std::endl;
|
||||||
|
WinHttpCloseHandle(hRequest);
|
||||||
|
WinHttpCloseHandle(hConnect);
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get status code
|
||||||
|
DWORD statusCode = 0;
|
||||||
|
DWORD statusCodeSize = sizeof(statusCode);
|
||||||
|
WinHttpQueryHeaders(
|
||||||
|
hRequest,
|
||||||
|
WINHTTP_QUERY_STATUS_CODE | WINHTTP_QUERY_FLAG_NUMBER,
|
||||||
|
WINHTTP_HEADER_NAME_BY_INDEX,
|
||||||
|
&statusCode,
|
||||||
|
&statusCodeSize,
|
||||||
|
WINHTTP_NO_HEADER_INDEX
|
||||||
|
);
|
||||||
|
|
||||||
|
// Read response body
|
||||||
|
std::string responseBody;
|
||||||
|
DWORD bytesAvailable = 0;
|
||||||
|
do {
|
||||||
|
bytesAvailable = 0;
|
||||||
|
if (!WinHttpQueryDataAvailable(hRequest, &bytesAvailable)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (bytesAvailable == 0) break;
|
||||||
|
|
||||||
|
std::vector<char> buffer(bytesAvailable + 1);
|
||||||
|
DWORD bytesRead = 0;
|
||||||
|
if (WinHttpReadData(hRequest, buffer.data(), bytesAvailable, &bytesRead)) {
|
||||||
|
responseBody.append(buffer.data(), bytesRead);
|
||||||
|
}
|
||||||
|
} while (bytesAvailable > 0);
|
||||||
|
|
||||||
|
WinHttpCloseHandle(hRequest);
|
||||||
|
WinHttpCloseHandle(hConnect);
|
||||||
|
|
||||||
|
return HttpResponse{static_cast<int>(statusCode), responseBody};
|
||||||
|
#else
|
||||||
|
return std::nullopt;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<HttpResponse> WinHttpClient::postMultipart(
|
||||||
|
const std::string& host,
|
||||||
|
const std::string& path,
|
||||||
|
const std::map<std::string, std::string>& fields,
|
||||||
|
const std::string& fileFieldName,
|
||||||
|
const std::string& fileName,
|
||||||
|
const std::string& fileContentType,
|
||||||
|
const std::vector<char>& fileData,
|
||||||
|
const std::map<std::string, std::string>& headers) {
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
if (!hSession_) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string boundary = generateBoundary();
|
||||||
|
|
||||||
|
// Build multipart body
|
||||||
|
std::ostringstream body;
|
||||||
|
|
||||||
|
// Add fields
|
||||||
|
for (const auto& [key, value] : fields) {
|
||||||
|
body << "--" << boundary << "\r\n";
|
||||||
|
body << "Content-Disposition: form-data; name=\"" << key << "\"\r\n\r\n";
|
||||||
|
body << value << "\r\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add file
|
||||||
|
body << "--" << boundary << "\r\n";
|
||||||
|
body << "Content-Disposition: form-data; name=\"" << fileFieldName << "\"; filename=\"" << fileName << "\"\r\n";
|
||||||
|
body << "Content-Type: " << fileContentType << "\r\n\r\n";
|
||||||
|
|
||||||
|
std::string bodyPrefix = body.str();
|
||||||
|
std::string bodySuffix = "\r\n--" + boundary + "--\r\n";
|
||||||
|
|
||||||
|
// Combine all parts
|
||||||
|
std::vector<char> fullBody;
|
||||||
|
fullBody.insert(fullBody.end(), bodyPrefix.begin(), bodyPrefix.end());
|
||||||
|
fullBody.insert(fullBody.end(), fileData.begin(), fileData.end());
|
||||||
|
fullBody.insert(fullBody.end(), bodySuffix.begin(), bodySuffix.end());
|
||||||
|
|
||||||
|
std::wstring wHost = toWideString(host);
|
||||||
|
HINTERNET hConnect = WinHttpConnect(hSession_, wHost.c_str(), INTERNET_DEFAULT_HTTPS_PORT, 0);
|
||||||
|
if (!hConnect) {
|
||||||
|
std::cerr << "WinHttpConnect failed: " << GetLastError() << std::endl;
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::wstring wPath = toWideString(path);
|
||||||
|
HINTERNET hRequest = WinHttpOpenRequest(
|
||||||
|
hConnect,
|
||||||
|
L"POST",
|
||||||
|
wPath.c_str(),
|
||||||
|
nullptr,
|
||||||
|
WINHTTP_NO_REFERER,
|
||||||
|
WINHTTP_DEFAULT_ACCEPT_TYPES,
|
||||||
|
WINHTTP_FLAG_SECURE
|
||||||
|
);
|
||||||
|
if (!hRequest) {
|
||||||
|
std::cerr << "WinHttpOpenRequest failed: " << GetLastError() << std::endl;
|
||||||
|
WinHttpCloseHandle(hConnect);
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add headers
|
||||||
|
for (const auto& [key, value] : headers) {
|
||||||
|
std::wstring header = toWideString(key + ": " + value);
|
||||||
|
WinHttpAddRequestHeaders(hRequest, header.c_str(), -1, WINHTTP_ADDREQ_FLAG_ADD);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add content-type header
|
||||||
|
std::wstring contentType = toWideString("Content-Type: multipart/form-data; boundary=" + boundary);
|
||||||
|
WinHttpAddRequestHeaders(hRequest, contentType.c_str(), -1, WINHTTP_ADDREQ_FLAG_ADD);
|
||||||
|
|
||||||
|
// Send request
|
||||||
|
BOOL result = WinHttpSendRequest(
|
||||||
|
hRequest,
|
||||||
|
WINHTTP_NO_ADDITIONAL_HEADERS,
|
||||||
|
0,
|
||||||
|
fullBody.data(),
|
||||||
|
fullBody.size(),
|
||||||
|
fullBody.size(),
|
||||||
|
0
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!result) {
|
||||||
|
std::cerr << "WinHttpSendRequest failed: " << GetLastError() << std::endl;
|
||||||
|
WinHttpCloseHandle(hRequest);
|
||||||
|
WinHttpCloseHandle(hConnect);
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
result = WinHttpReceiveResponse(hRequest, nullptr);
|
||||||
|
if (!result) {
|
||||||
|
std::cerr << "WinHttpReceiveResponse failed: " << GetLastError() << std::endl;
|
||||||
|
WinHttpCloseHandle(hRequest);
|
||||||
|
WinHttpCloseHandle(hConnect);
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get status code
|
||||||
|
DWORD statusCode = 0;
|
||||||
|
DWORD statusCodeSize = sizeof(statusCode);
|
||||||
|
WinHttpQueryHeaders(
|
||||||
|
hRequest,
|
||||||
|
WINHTTP_QUERY_STATUS_CODE | WINHTTP_QUERY_FLAG_NUMBER,
|
||||||
|
WINHTTP_HEADER_NAME_BY_INDEX,
|
||||||
|
&statusCode,
|
||||||
|
&statusCodeSize,
|
||||||
|
WINHTTP_NO_HEADER_INDEX
|
||||||
|
);
|
||||||
|
|
||||||
|
// Read response body
|
||||||
|
std::string responseBody;
|
||||||
|
DWORD bytesAvailable = 0;
|
||||||
|
do {
|
||||||
|
bytesAvailable = 0;
|
||||||
|
if (!WinHttpQueryDataAvailable(hRequest, &bytesAvailable)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (bytesAvailable == 0) break;
|
||||||
|
|
||||||
|
std::vector<char> buffer(bytesAvailable + 1);
|
||||||
|
DWORD bytesRead = 0;
|
||||||
|
if (WinHttpReadData(hRequest, buffer.data(), bytesAvailable, &bytesRead)) {
|
||||||
|
responseBody.append(buffer.data(), bytesRead);
|
||||||
|
}
|
||||||
|
} while (bytesAvailable > 0);
|
||||||
|
|
||||||
|
WinHttpCloseHandle(hRequest);
|
||||||
|
WinHttpCloseHandle(hConnect);
|
||||||
|
|
||||||
|
return HttpResponse{static_cast<int>(statusCode), responseBody};
|
||||||
|
#else
|
||||||
|
return std::nullopt;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace secondvoice
|
||||||
52
src/api/WinHttpClient.h
Normal file
52
src/api/WinHttpClient.h
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include <windows.h>
|
||||||
|
#include <winhttp.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
|
namespace secondvoice {
|
||||||
|
|
||||||
|
struct HttpResponse {
|
||||||
|
int statusCode;
|
||||||
|
std::string body;
|
||||||
|
};
|
||||||
|
|
||||||
|
class WinHttpClient {
|
||||||
|
public:
|
||||||
|
WinHttpClient();
|
||||||
|
~WinHttpClient();
|
||||||
|
|
||||||
|
// POST JSON request
|
||||||
|
std::optional<HttpResponse> postJson(
|
||||||
|
const std::string& host,
|
||||||
|
const std::string& path,
|
||||||
|
const std::string& jsonBody,
|
||||||
|
const std::map<std::string, std::string>& headers
|
||||||
|
);
|
||||||
|
|
||||||
|
// POST multipart form data
|
||||||
|
std::optional<HttpResponse> postMultipart(
|
||||||
|
const std::string& host,
|
||||||
|
const std::string& path,
|
||||||
|
const std::map<std::string, std::string>& fields,
|
||||||
|
const std::string& fileFieldName,
|
||||||
|
const std::string& fileName,
|
||||||
|
const std::string& fileContentType,
|
||||||
|
const std::vector<char>& fileData,
|
||||||
|
const std::map<std::string, std::string>& headers
|
||||||
|
);
|
||||||
|
|
||||||
|
private:
|
||||||
|
#ifdef _WIN32
|
||||||
|
HINTERNET hSession_;
|
||||||
|
#endif
|
||||||
|
std::string generateBoundary();
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace secondvoice
|
||||||
@ -3,10 +3,12 @@
|
|||||||
|
|
||||||
namespace secondvoice {
|
namespace secondvoice {
|
||||||
|
|
||||||
AudioCapture::AudioCapture(int sample_rate, int channels, int chunk_duration_seconds)
|
AudioCapture::AudioCapture(int sample_rate, int channels, int chunk_duration_seconds, int chunk_step_seconds)
|
||||||
: sample_rate_(sample_rate)
|
: sample_rate_(sample_rate)
|
||||||
, channels_(channels)
|
, channels_(channels)
|
||||||
, chunk_duration_seconds_(chunk_duration_seconds) {
|
, chunk_duration_seconds_(chunk_duration_seconds)
|
||||||
|
, chunk_step_seconds_(chunk_step_seconds > 0 ? chunk_step_seconds : chunk_duration_seconds) {
|
||||||
|
// If step not specified, use duration (no overlap)
|
||||||
}
|
}
|
||||||
|
|
||||||
AudioCapture::~AudioCapture() {
|
AudioCapture::~AudioCapture() {
|
||||||
@ -47,12 +49,17 @@ int AudioCapture::audioCallback(const void* input, void* output,
|
|||||||
|
|
||||||
// Check if we have accumulated enough data for a chunk
|
// Check if we have accumulated enough data for a chunk
|
||||||
size_t chunk_samples = self->sample_rate_ * self->channels_ * self->chunk_duration_seconds_;
|
size_t chunk_samples = self->sample_rate_ * self->channels_ * self->chunk_duration_seconds_;
|
||||||
|
size_t step_samples = self->sample_rate_ * self->channels_ * self->chunk_step_seconds_;
|
||||||
|
|
||||||
if (self->buffer_.size() >= chunk_samples) {
|
if (self->buffer_.size() >= chunk_samples) {
|
||||||
// Call the callback with the chunk
|
// Call the callback with the full chunk
|
||||||
if (self->callback_) {
|
if (self->callback_) {
|
||||||
self->callback_(self->buffer_);
|
// Send exactly chunk_samples (the window)
|
||||||
|
std::vector<float> chunk(self->buffer_.begin(), self->buffer_.begin() + chunk_samples);
|
||||||
|
self->callback_(chunk);
|
||||||
}
|
}
|
||||||
self->buffer_.clear();
|
// Sliding window: remove only step_samples, keep overlap for next chunk
|
||||||
|
self->buffer_.erase(self->buffer_.begin(), self->buffer_.begin() + step_samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
return paContinue;
|
return paContinue;
|
||||||
|
|||||||
@ -11,7 +11,8 @@ class AudioCapture {
|
|||||||
public:
|
public:
|
||||||
using AudioCallback = std::function<void(const std::vector<float>&)>;
|
using AudioCallback = std::function<void(const std::vector<float>&)>;
|
||||||
|
|
||||||
AudioCapture(int sample_rate, int channels, int chunk_duration_seconds);
|
// chunk_duration = window size, chunk_step = how often to send (overlap = duration - step)
|
||||||
|
AudioCapture(int sample_rate, int channels, int chunk_duration_seconds, int chunk_step_seconds = 0);
|
||||||
~AudioCapture();
|
~AudioCapture();
|
||||||
|
|
||||||
bool initialize();
|
bool initialize();
|
||||||
@ -29,6 +30,7 @@ private:
|
|||||||
int sample_rate_;
|
int sample_rate_;
|
||||||
int channels_;
|
int channels_;
|
||||||
int chunk_duration_seconds_;
|
int chunk_duration_seconds_;
|
||||||
|
int chunk_step_seconds_; // How often to emit chunks (0 = same as duration, no overlap)
|
||||||
bool is_recording_ = false;
|
bool is_recording_ = false;
|
||||||
|
|
||||||
PaStream* stream_ = nullptr;
|
PaStream* stream_ = nullptr;
|
||||||
|
|||||||
@ -10,6 +10,8 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
|
#include <cmath>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
namespace secondvoice {
|
namespace secondvoice {
|
||||||
|
|
||||||
@ -22,13 +24,19 @@ Pipeline::~Pipeline() {
|
|||||||
bool Pipeline::initialize() {
|
bool Pipeline::initialize() {
|
||||||
auto& config = Config::getInstance();
|
auto& config = Config::getInstance();
|
||||||
|
|
||||||
// Initialize audio capture
|
// Initialize audio capture with sliding window (overlap)
|
||||||
audio_capture_ = std::make_unique<AudioCapture>(
|
audio_capture_ = std::make_unique<AudioCapture>(
|
||||||
config.getAudioConfig().sample_rate,
|
config.getAudioConfig().sample_rate,
|
||||||
config.getAudioConfig().channels,
|
config.getAudioConfig().channels,
|
||||||
config.getAudioConfig().chunk_duration_seconds
|
config.getAudioConfig().chunk_duration_seconds,
|
||||||
|
config.getAudioConfig().chunk_step_seconds
|
||||||
);
|
);
|
||||||
|
|
||||||
|
std::cout << "[Pipeline] Audio: " << config.getAudioConfig().chunk_duration_seconds
|
||||||
|
<< "s window, " << config.getAudioConfig().chunk_step_seconds
|
||||||
|
<< "s step (overlap: " << (config.getAudioConfig().chunk_duration_seconds - config.getAudioConfig().chunk_step_seconds)
|
||||||
|
<< "s)" << std::endl;
|
||||||
|
|
||||||
if (!audio_capture_->initialize()) {
|
if (!audio_capture_->initialize()) {
|
||||||
std::cerr << "Failed to initialize audio capture" << std::endl;
|
std::cerr << "Failed to initialize audio capture" << std::endl;
|
||||||
return false;
|
return false;
|
||||||
@ -68,10 +76,9 @@ bool Pipeline::start() {
|
|||||||
|
|
||||||
running_ = true;
|
running_ = true;
|
||||||
|
|
||||||
// Start threads
|
// Start background threads (NOT UI - that runs in main thread)
|
||||||
audio_thread_ = std::thread(&Pipeline::audioThread, this);
|
audio_thread_ = std::thread(&Pipeline::audioThread, this);
|
||||||
processing_thread_ = std::thread(&Pipeline::processingThread, this);
|
processing_thread_ = std::thread(&Pipeline::processingThread, this);
|
||||||
ui_thread_ = std::thread(&Pipeline::uiThread, this);
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -92,16 +99,13 @@ void Pipeline::stop() {
|
|||||||
audio_queue_.shutdown();
|
audio_queue_.shutdown();
|
||||||
transcription_queue_.shutdown();
|
transcription_queue_.shutdown();
|
||||||
|
|
||||||
// Wait for threads
|
// Wait for background threads
|
||||||
if (audio_thread_.joinable()) {
|
if (audio_thread_.joinable()) {
|
||||||
audio_thread_.join();
|
audio_thread_.join();
|
||||||
}
|
}
|
||||||
if (processing_thread_.joinable()) {
|
if (processing_thread_.joinable()) {
|
||||||
processing_thread_.join();
|
processing_thread_.join();
|
||||||
}
|
}
|
||||||
if (ui_thread_.joinable()) {
|
|
||||||
ui_thread_.join();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save full recording
|
// Save full recording
|
||||||
auto& config = Config::getInstance();
|
auto& config = Config::getInstance();
|
||||||
@ -153,6 +157,11 @@ void Pipeline::audioThread() {
|
|||||||
void Pipeline::processingThread() {
|
void Pipeline::processingThread() {
|
||||||
auto& config = Config::getInstance();
|
auto& config = Config::getInstance();
|
||||||
|
|
||||||
|
// VAD threshold - audio RMS must exceed this to be considered speech
|
||||||
|
// Higher values = more aggressive noise filtering
|
||||||
|
constexpr float VAD_THRESHOLD = 0.02f; // RMS energy threshold
|
||||||
|
constexpr float VAD_MIN_PEAK = 0.08f; // Minimum peak amplitude
|
||||||
|
|
||||||
while (running_) {
|
while (running_) {
|
||||||
auto chunk_opt = audio_queue_.wait_and_pop();
|
auto chunk_opt = audio_queue_.wait_and_pop();
|
||||||
if (!chunk_opt.has_value()) {
|
if (!chunk_opt.has_value()) {
|
||||||
@ -161,6 +170,22 @@ void Pipeline::processingThread() {
|
|||||||
|
|
||||||
auto& chunk = chunk_opt.value();
|
auto& chunk = chunk_opt.value();
|
||||||
|
|
||||||
|
// Voice Activity Detection - skip silent/noise chunks
|
||||||
|
float sum_squared = 0.0f;
|
||||||
|
float max_amplitude = 0.0f;
|
||||||
|
for (const float sample : chunk.data) {
|
||||||
|
sum_squared += sample * sample;
|
||||||
|
max_amplitude = std::max(max_amplitude, std::abs(sample));
|
||||||
|
}
|
||||||
|
float rms = std::sqrt(sum_squared / chunk.data.size());
|
||||||
|
|
||||||
|
if (rms < VAD_THRESHOLD || max_amplitude < VAD_MIN_PEAK) {
|
||||||
|
std::cout << "[Skip] Silent chunk (RMS: " << rms << ", Peak: " << max_amplitude << ")" << std::endl;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "[Audio] Processing chunk (RMS: " << rms << ", Peak: " << max_amplitude << ")" << std::endl;
|
||||||
|
|
||||||
// Transcribe with Whisper
|
// Transcribe with Whisper
|
||||||
auto whisper_result = whisper_client_->transcribe(
|
auto whisper_result = whisper_client_->transcribe(
|
||||||
chunk.data,
|
chunk.data,
|
||||||
@ -178,9 +203,71 @@ void Pipeline::processingThread() {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Translate with Claude
|
// Filter out Whisper hallucinations (common when audio is silent/unclear)
|
||||||
|
std::string text = whisper_result->text;
|
||||||
|
// Trim whitespace
|
||||||
|
size_t start = text.find_first_not_of(" \t\n\r");
|
||||||
|
size_t end = text.find_last_not_of(" \t\n\r");
|
||||||
|
if (start == std::string::npos) {
|
||||||
|
std::cout << "[Skip] Empty transcription" << std::endl;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
text = text.substr(start, end - start + 1);
|
||||||
|
|
||||||
|
// Skip known hallucinations and garbage
|
||||||
|
bool is_garbage = false;
|
||||||
|
|
||||||
|
// Too short to be meaningful
|
||||||
|
if (text.length() < 4) {
|
||||||
|
is_garbage = true;
|
||||||
|
}
|
||||||
|
// Known Whisper hallucinations
|
||||||
|
else if (text.find("Amara.org") != std::string::npos ||
|
||||||
|
text.find("amara.org") != std::string::npos ||
|
||||||
|
text.find("字幕") != std::string::npos ||
|
||||||
|
text.find("subtitle") != std::string::npos ||
|
||||||
|
text.find("Subtitle") != std::string::npos ||
|
||||||
|
text.find("Thank you") != std::string::npos ||
|
||||||
|
text.find("thanks for watching") != std::string::npos ||
|
||||||
|
text.find("Subscribe") != std::string::npos ||
|
||||||
|
text.find("谢谢观看") != std::string::npos ||
|
||||||
|
text.find("订阅") != std::string::npos ||
|
||||||
|
text.find("...") == 0) { // Starts with ellipsis
|
||||||
|
is_garbage = true;
|
||||||
|
}
|
||||||
|
// Only punctuation or whitespace
|
||||||
|
else {
|
||||||
|
bool has_content = false;
|
||||||
|
for (char c : text) {
|
||||||
|
if (std::isalnum(static_cast<unsigned char>(c)) || (c & 0x80)) {
|
||||||
|
has_content = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!has_content) {
|
||||||
|
is_garbage = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_garbage) {
|
||||||
|
std::cout << "[Skip] Filtered: " << text << std::endl;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove overlap with previous transcription
|
||||||
|
std::string deduplicated = removeOverlap(text, last_transcription_);
|
||||||
|
last_transcription_ = text; // Store full text for next comparison
|
||||||
|
|
||||||
|
if (deduplicated.empty() || deduplicated.length() < 2) {
|
||||||
|
std::cout << "[Skip] Fully overlapping transcription" << std::endl;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "[New content] " << deduplicated << std::endl;
|
||||||
|
|
||||||
|
// Translate with Claude (only the new, deduplicated content)
|
||||||
auto claude_result = claude_client_->translate(
|
auto claude_result = claude_client_->translate(
|
||||||
whisper_result->text,
|
deduplicated,
|
||||||
config.getClaudeConfig().system_prompt,
|
config.getClaudeConfig().system_prompt,
|
||||||
config.getClaudeConfig().max_tokens,
|
config.getClaudeConfig().max_tokens,
|
||||||
config.getClaudeConfig().temperature
|
config.getClaudeConfig().temperature
|
||||||
@ -192,19 +279,17 @@ void Pipeline::processingThread() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Add to UI
|
// Add to UI
|
||||||
ui_->addTranslation(whisper_result->text, claude_result->text);
|
ui_->addTranslation(deduplicated, claude_result->text);
|
||||||
|
|
||||||
std::cout << "CN: " << whisper_result->text << std::endl;
|
std::cout << "CN: " << deduplicated << std::endl;
|
||||||
std::cout << "FR: " << claude_result->text << std::endl;
|
std::cout << "FR: " << claude_result->text << std::endl;
|
||||||
std::cout << "---" << std::endl;
|
std::cout << "---" << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Pipeline::uiThread() {
|
void Pipeline::update() {
|
||||||
// CRITICAL: Make OpenGL context current in THIS thread (not the thread that created it)
|
if (!ui_) return;
|
||||||
ui_->makeContextCurrent();
|
|
||||||
|
|
||||||
while (running_ && !ui_->shouldClose()) {
|
|
||||||
ui_->setRecordingDuration(recording_duration_);
|
ui_->setRecordingDuration(recording_duration_);
|
||||||
ui_->setProcessingStatus("Processing...");
|
ui_->setProcessingStatus("Processing...");
|
||||||
ui_->render();
|
ui_->render();
|
||||||
@ -212,11 +297,51 @@ void Pipeline::uiThread() {
|
|||||||
// Check if stop was requested
|
// Check if stop was requested
|
||||||
if (ui_->isStopRequested()) {
|
if (ui_->isStopRequested()) {
|
||||||
running_ = false;
|
running_ = false;
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::this_thread::sleep_for(std::chrono::milliseconds(16)); // ~60 FPS
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Pipeline::shouldClose() const {
|
||||||
|
return ui_ ? ui_->shouldClose() : true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string Pipeline::removeOverlap(const std::string& current, const std::string& previous) {
|
||||||
|
if (previous.empty()) {
|
||||||
|
return current;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to find overlap: check if current starts with end of previous
|
||||||
|
// Start from half the previous text (since we have 50% overlap)
|
||||||
|
size_t min_overlap = 4; // Minimum characters to consider as overlap
|
||||||
|
size_t search_start = previous.length() / 3; // Start looking from 1/3 into previous
|
||||||
|
|
||||||
|
for (size_t i = search_start; i < previous.length() - min_overlap; ++i) {
|
||||||
|
std::string suffix = previous.substr(i);
|
||||||
|
if (current.find(suffix) == 0) {
|
||||||
|
// Found overlap - return only the new part
|
||||||
|
std::string new_part = current.substr(suffix.length());
|
||||||
|
if (!new_part.empty()) {
|
||||||
|
std::cout << "[Overlap] Removed: \"" << suffix << "\"" << std::endl;
|
||||||
|
return new_part;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// No overlap found - check for partial word overlap at start
|
||||||
|
// This handles cases where the same content appears but tokenized differently
|
||||||
|
size_t max_check = std::min(current.length(), previous.length()) / 2;
|
||||||
|
for (size_t len = max_check; len >= min_overlap; --len) {
|
||||||
|
std::string end_prev = previous.substr(previous.length() - len);
|
||||||
|
std::string start_curr = current.substr(0, len);
|
||||||
|
if (end_prev == start_curr) {
|
||||||
|
std::string new_part = current.substr(len);
|
||||||
|
if (!new_part.empty()) {
|
||||||
|
std::cout << "[Overlap] Partial match removed: \"" << end_prev << "\"" << std::endl;
|
||||||
|
return new_part;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return current; // No overlap detected
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace secondvoice
|
} // namespace secondvoice
|
||||||
|
|||||||
@ -34,12 +34,15 @@ public:
|
|||||||
bool start();
|
bool start();
|
||||||
void stop();
|
void stop();
|
||||||
|
|
||||||
|
// Call this from main thread for UI updates
|
||||||
|
void update();
|
||||||
|
|
||||||
bool isRunning() const { return running_; }
|
bool isRunning() const { return running_; }
|
||||||
|
bool shouldClose() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void audioThread();
|
void audioThread();
|
||||||
void processingThread();
|
void processingThread();
|
||||||
void uiThread();
|
|
||||||
|
|
||||||
std::unique_ptr<AudioCapture> audio_capture_;
|
std::unique_ptr<AudioCapture> audio_capture_;
|
||||||
std::unique_ptr<WhisperClient> whisper_client_;
|
std::unique_ptr<WhisperClient> whisper_client_;
|
||||||
@ -52,10 +55,13 @@ private:
|
|||||||
|
|
||||||
std::thread audio_thread_;
|
std::thread audio_thread_;
|
||||||
std::thread processing_thread_;
|
std::thread processing_thread_;
|
||||||
std::thread ui_thread_;
|
|
||||||
|
|
||||||
std::atomic<bool> running_{false};
|
std::atomic<bool> running_{false};
|
||||||
std::atomic<int> recording_duration_{0};
|
std::atomic<int> recording_duration_{0};
|
||||||
|
|
||||||
|
// For overlap deduplication
|
||||||
|
std::string last_transcription_;
|
||||||
|
std::string removeOverlap(const std::string& current, const std::string& previous);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace secondvoice
|
} // namespace secondvoice
|
||||||
|
|||||||
21
src/main.cpp
21
src/main.cpp
@ -4,6 +4,10 @@
|
|||||||
#include "utils/Config.h"
|
#include "utils/Config.h"
|
||||||
#include "core/Pipeline.h"
|
#include "core/Pipeline.h"
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include <windows.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
// Force NVIDIA GPU on Optimus systems (instead of integrated AMD/Intel)
|
// Force NVIDIA GPU on Optimus systems (instead of integrated AMD/Intel)
|
||||||
// These MUST be global and volatile to prevent linker optimization
|
// These MUST be global and volatile to prevent linker optimization
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
@ -38,6 +42,12 @@ int main(int argc, char** argv) {
|
|||||||
(void)argc;
|
(void)argc;
|
||||||
(void)argv;
|
(void)argv;
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
// Enable UTF-8 console output for Chinese characters
|
||||||
|
SetConsoleOutputCP(CP_UTF8);
|
||||||
|
SetConsoleCP(CP_UTF8);
|
||||||
|
#endif
|
||||||
|
|
||||||
log_msg("MAIN: Entry point reached");
|
log_msg("MAIN: Entry point reached");
|
||||||
log_msg("========================================");
|
log_msg("========================================");
|
||||||
log_msg("SecondVoice - Real-time Translation System");
|
log_msg("SecondVoice - Real-time Translation System");
|
||||||
@ -67,17 +77,18 @@ int main(int argc, char** argv) {
|
|||||||
log_msg("Pipeline initialized successfully");
|
log_msg("Pipeline initialized successfully");
|
||||||
log_msg("Starting recording and translation...");
|
log_msg("Starting recording and translation...");
|
||||||
|
|
||||||
// Start pipeline
|
// Start pipeline (background threads for audio + API calls)
|
||||||
log_msg("Starting pipeline...");
|
log_msg("Starting pipeline...");
|
||||||
if (!pipeline.start()) {
|
if (!pipeline.start()) {
|
||||||
log_msg("ERROR: Failed to start pipeline");
|
log_msg("ERROR: Failed to start pipeline");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for pipeline to finish (user clicks Stop button)
|
// Main loop - UI runs in main thread (required by GLFW)
|
||||||
log_msg("Pipeline running, waiting for user to stop...");
|
log_msg("Pipeline running, entering main loop...");
|
||||||
while (pipeline.isRunning()) {
|
while (pipeline.isRunning() && !pipeline.shouldClose()) {
|
||||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
pipeline.update(); // Render one frame
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(16)); // ~60 FPS
|
||||||
}
|
}
|
||||||
|
|
||||||
log_msg("");
|
log_msg("");
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
#include <glad/glad.h> // MUST be FIRST! Provides OpenGL functions
|
#include <glad/glad.h> // MUST be FIRST! Provides OpenGL functions
|
||||||
#define GLFW_INCLUDE_NONE // Tell GLFW not to include OpenGL headers (GLAD does it)
|
#define GLFW_INCLUDE_NONE // Tell GLFW not to include OpenGL headers (GLAD does it)
|
||||||
#include "TranslationUI.h"
|
#include "TranslationUI.h"
|
||||||
|
#include "../utils/Config.h"
|
||||||
#include <imgui.h>
|
#include <imgui.h>
|
||||||
#include <imgui_impl_glfw.h>
|
#include <imgui_impl_glfw.h>
|
||||||
#include <imgui_impl_opengl3.h>
|
#include <imgui_impl_opengl3.h>
|
||||||
@ -86,6 +87,40 @@ bool TranslationUI::initialize() {
|
|||||||
ImGuiIO& io = ImGui::GetIO();
|
ImGuiIO& io = ImGui::GetIO();
|
||||||
io.ConfigFlags |= ImGuiConfigFlags_NavEnableKeyboard;
|
io.ConfigFlags |= ImGuiConfigFlags_NavEnableKeyboard;
|
||||||
|
|
||||||
|
// Load Chinese font from Windows system fonts
|
||||||
|
float font_size = static_cast<float>(Config::getInstance().getUIConfig().font_size);
|
||||||
|
std::cout << "[UI] Loading Chinese font (size: " << font_size << ")..." << std::endl;
|
||||||
|
ImFontConfig font_config;
|
||||||
|
font_config.OversampleH = 2;
|
||||||
|
font_config.OversampleV = 2;
|
||||||
|
|
||||||
|
// Try common Chinese fonts on Windows
|
||||||
|
const char* chinese_fonts[] = {
|
||||||
|
"C:\\Windows\\Fonts\\msyh.ttc", // Microsoft YaHei
|
||||||
|
"C:\\Windows\\Fonts\\simhei.ttf", // SimHei
|
||||||
|
"C:\\Windows\\Fonts\\simsun.ttc", // SimSun
|
||||||
|
"C:\\Windows\\Fonts\\mingliub.ttc", // MingLiU
|
||||||
|
};
|
||||||
|
|
||||||
|
ImFont* font = nullptr;
|
||||||
|
for (const char* font_path : chinese_fonts) {
|
||||||
|
font = io.Fonts->AddFontFromFileTTF(
|
||||||
|
font_path,
|
||||||
|
font_size,
|
||||||
|
&font_config,
|
||||||
|
io.Fonts->GetGlyphRangesChineseFull()
|
||||||
|
);
|
||||||
|
if (font) {
|
||||||
|
std::cout << "[UI] Loaded font: " << font_path << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!font) {
|
||||||
|
std::cout << "[UI] Warning: No Chinese font found, using default (Chinese chars won't display)" << std::endl;
|
||||||
|
io.Fonts->AddFontDefault();
|
||||||
|
}
|
||||||
|
|
||||||
ImGui::StyleColorsDark();
|
ImGui::StyleColorsDark();
|
||||||
|
|
||||||
ImGui_ImplGlfw_InitForOpenGL(window_, true);
|
ImGui_ImplGlfw_InitForOpenGL(window_, true);
|
||||||
@ -130,13 +165,12 @@ void main() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Let ImGui auto-detect the GLSL version
|
// Explicitly specify GLSL version for OpenGL 3.3 core profile
|
||||||
ImGui_ImplOpenGL3_Init(nullptr);
|
// On Windows, auto-detection (nullptr) can fail, so we specify "#version 330"
|
||||||
|
std::cout << "[UI] Initializing ImGui OpenGL3 backend with GLSL 330..." << std::endl;
|
||||||
|
ImGui_ImplOpenGL3_Init("#version 330");
|
||||||
|
|
||||||
// CRITICAL: Release the OpenGL context from this thread
|
// Context stays in main thread - no need to release
|
||||||
// The UI rendering will happen in a separate thread which will call makeContextCurrent()
|
|
||||||
std::cout << "[UI] Releasing OpenGL context from initialization thread" << std::endl;
|
|
||||||
glfwMakeContextCurrent(nullptr);
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -71,7 +71,8 @@ bool Config::load(const std::string& config_path, const std::string& env_path) {
|
|||||||
audio_config_.sample_rate = audio.value("sample_rate", 16000);
|
audio_config_.sample_rate = audio.value("sample_rate", 16000);
|
||||||
audio_config_.channels = audio.value("channels", 1);
|
audio_config_.channels = audio.value("channels", 1);
|
||||||
audio_config_.chunk_duration_seconds = audio.value("chunk_duration_seconds", 10);
|
audio_config_.chunk_duration_seconds = audio.value("chunk_duration_seconds", 10);
|
||||||
audio_config_.format = audio.value("format", "wav");
|
audio_config_.chunk_step_seconds = audio.value("chunk_step_seconds", 0); // 0 = no overlap
|
||||||
|
audio_config_.format = audio.value("format", "ogg");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse whisper config
|
// Parse whisper config
|
||||||
|
|||||||
@ -8,6 +8,7 @@ struct AudioConfig {
|
|||||||
int sample_rate;
|
int sample_rate;
|
||||||
int channels;
|
int channels;
|
||||||
int chunk_duration_seconds;
|
int chunk_duration_seconds;
|
||||||
|
int chunk_step_seconds; // How often to emit chunks (for overlap)
|
||||||
std::string format;
|
std::string format;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -3,7 +3,6 @@
|
|||||||
"version": "0.1.0",
|
"version": "0.1.0",
|
||||||
"dependencies": [
|
"dependencies": [
|
||||||
"portaudio",
|
"portaudio",
|
||||||
"cpp-httplib",
|
|
||||||
"nlohmann-json",
|
"nlohmann-json",
|
||||||
"glfw3",
|
"glfw3",
|
||||||
"glad",
|
"glad",
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user