feat: Add GLAD OpenGL loader and NVIDIA GPU forcing

Changes:
- Add GLAD dependency via vcpkg for proper OpenGL function loading
- Force NVIDIA GPU usage with game-style exports (NvOptimusEnablement)
- Create working console version (SecondVoice_Console.exe)
- Add dual executable build (UI + Console versions)
- Update to OpenGL 4.6 Core Profile with GLSL 460
- Add GPU detection and logging
- Fix GLFW header conflicts with GLFW_INCLUDE_NONE

Note: OpenGL shaders still failing to compile despite GLAD integration.
Console version is fully functional for audio capture and translation.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
StillHammer 2025-11-21 15:18:54 +08:00
parent 07b792b2bd
commit ddf34db2a0
11 changed files with 454 additions and 65 deletions

View File

@ -5,16 +5,23 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# Force static linking for MinGW
if(MINGW)
set(BUILD_SHARED_LIBS OFF)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++")
endif()
# Find packages
find_package(portaudio CONFIG REQUIRED)
find_package(httplib CONFIG REQUIRED)
find_package(nlohmann_json CONFIG REQUIRED)
find_package(imgui CONFIG REQUIRED)
find_package(glfw3 CONFIG REQUIRED)
find_package(glad CONFIG REQUIRED)
find_package(OpenGL REQUIRED)
# Source files
set(SOURCES
# Source files for UI version
set(SOURCES_UI
src/main.cpp
# Audio module
src/audio/AudioCapture.cpp
@ -30,23 +37,76 @@ set(SOURCES
src/core/Pipeline.cpp
)
# Executable
add_executable(${PROJECT_NAME} ${SOURCES})
# Source files for Console version (NO UI, NO Pipeline)
set(SOURCES_CONSOLE
src/main_console.cpp
# Audio module
src/audio/AudioCapture.cpp
src/audio/AudioBuffer.cpp
# API clients
src/api/WhisperClient.cpp
src/api/ClaudeClient.cpp
# Utils
src/utils/Config.cpp
# Core - WAIT, Pipeline uses UI!
# src/core/Pipeline.cpp
)
# UI Executable
add_executable(${PROJECT_NAME} ${SOURCES_UI})
# Console Executable (NO UI)
add_executable(${PROJECT_NAME}_Console ${SOURCES_CONSOLE})
# Include directories
target_include_directories(${PROJECT_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src
)
target_include_directories(${PROJECT_NAME}_Console PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src
)
# Link libraries
if(MINGW AND NOT BUILD_SHARED_LIBS)
# Static linking for MinGW - need to add Windows system libs for portaudio
target_link_libraries(${PROJECT_NAME} PRIVATE
${CMAKE_CURRENT_BINARY_DIR}/vcpkg_installed/x64-mingw-static/lib/libportaudio.a
httplib::httplib
nlohmann_json::nlohmann_json
imgui::imgui
${CMAKE_CURRENT_BINARY_DIR}/vcpkg_installed/x64-mingw-static/lib/libglfw3.a
glad::glad
OpenGL::GL
# Windows system libraries for portaudio
winmm
setupapi
)
# Console version - NO UI libs
target_link_libraries(${PROJECT_NAME}_Console PRIVATE
${CMAKE_CURRENT_BINARY_DIR}/vcpkg_installed/x64-mingw-static/lib/libportaudio.a
httplib::httplib
nlohmann_json::nlohmann_json
# Windows system libraries for portaudio
winmm
setupapi
)
else()
target_link_libraries(${PROJECT_NAME} PRIVATE
portaudio
httplib::httplib
nlohmann_json::nlohmann_json
imgui::imgui
glfw
glad::glad
OpenGL::GL
)
# Console version - NO UI libs
target_link_libraries(${PROJECT_NAME}_Console PRIVATE
portaudio
httplib::httplib
nlohmann_json::nlohmann_json
)
endif()
# Compiler options
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
@ -55,14 +115,27 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
-Wextra
-Wpedantic
)
target_compile_options(${PROJECT_NAME}_Console PRIVATE
-Wall
-Wextra
-Wpedantic
)
# MinGW: cpp-httplib's GetAddrInfoExCancel is not available
# Don't treat warnings as errors for MinGW due to httplib incompatibilities
if(NOT MINGW)
target_compile_options(${PROJECT_NAME} PRIVATE -Werror)
target_compile_options(${PROJECT_NAME}_Console PRIVATE -Werror)
else()
# Force console subsystem on Windows (not GUI subsystem)
target_link_options(${PROJECT_NAME} PRIVATE -mconsole)
target_link_options(${PROJECT_NAME}_Console PRIVATE -mconsole)
# FORCE GPU exports using .def file (like games do!)
target_link_options(${PROJECT_NAME} PRIVATE
-Wl,--export-all-symbols
${CMAKE_CURRENT_SOURCE_DIR}/src/gpu_exports.def
)
endif()
endif()

View File

@ -64,7 +64,7 @@
"binaryDir": "${sourceDir}/build/mingw-${presetName}",
"cacheVariables": {
"CMAKE_TOOLCHAIN_FILE": "$env{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake",
"VCPKG_TARGET_TRIPLET": "x64-mingw-dynamic",
"VCPKG_TARGET_TRIPLET": "x64-mingw-static",
"VCPKG_OVERLAY_TRIPLETS": "$env{VCPKG_ROOT}/triplets/community",
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
"CMAKE_C_COMPILER": "gcc",

110
WINDOWS_BUILD.md Normal file
View File

@ -0,0 +1,110 @@
# SecondVoice - Windows Build Guide
## Status
**Build réussi** - L'application compile et s'exécute
⚠️ **Problème GPU** - Utilise le GPU AMD intégré au lieu du NVIDIA RTX 4060
## Ce qui fonctionne
- ✅ Compilation MinGW sans Visual Studio
- ✅ Chargement de la configuration (config.json)
- ✅ Initialisation de PortAudio (capture audio)
- ✅ Création de la fenêtre GLFW
- ✅ Pipeline de traduction démarré
- ✅ Build statique (exe standalone de 6.5 MB)
## Problème actuel : Shaders OpenGL
Les shaders GLSL ne compilent pas car Windows utilise le **GPU AMD intégré** au lieu du **NVIDIA RTX 4060**.
### Diagnostic
Lancez `test_opengl.exe` dans `build\mingw-Release\` pour voir quel GPU est utilisé :
```bash
cd build\mingw-Release
test_opengl.exe
```
Résultat actuel : **AMD Radeon Graphics** (devrait être NVIDIA GeForce RTX 4060)
## Solutions
### Solution 1: Lancer depuis Windows (pas depuis bash)
**Le GPU NVIDIA ne sera utilisé que si vous lancez depuis Windows natif, pas depuis bash/terminal.**
Double-cliquez sur le **raccourci "SecondVoice" sur votre bureau** créé automatiquement.
OU allez dans l'explorateur Windows :
```
C:\Users\alexi\Documents\projects\secondvoice\build\mingw-Release\SecondVoice.exe
```
### Solution 2: Configuration Windows manuelle
Si le problème persiste :
1. **Windows Settings** (Win + I)
2. **Système****Affichage** → **Graphiques**
3. Cliquez **Parcourir** et sélectionnez :
```
C:\Users\alexi\Documents\projects\secondvoice\build\mingw-Release\SecondVoice.exe
```
4. Cliquez **Options** → **Hautes performances**
5. **Enregistrer**
### Solution 3: NVIDIA Control Panel
1. Clic droit sur le bureau → **Panneau de configuration NVIDIA**
2. **Gérer les paramètres 3D** → **Paramètres du programme**
3. **Ajouter** → Sélectionnez `SecondVoice.exe`
4. **Processeur graphique** → **NVIDIA haute performance**
5. **Appliquer**
## Build
### Rebuild complet
```bash
.\build_mingw.bat --clean
```
### Build incrémental
```bash
.\build_mingw.bat
```
### Configuration GPU (déjà fait)
```bash
powershell -ExecutionPolicy Bypass -File set_gpu.ps1
```
## Prérequis pour exécution
1. **Clés API** dans `.env` :
```
OPENAI_API_KEY=sk-...
ANTHROPIC_API_KEY=sk-ant-...
```
2. **Microphone** connecté
3. **config.json** (copié automatiquement dans build/)
## Fichiers importants
- `build\mingw-Release\SecondVoice.exe` - Exécutable principal (6.5 MB)
- `build\mingw-Release\test_opengl.exe` - Outil de diagnostic GPU
- `build\mingw-Release\config.json` - Configuration
- `build\mingw-Release\.env` - Clés API (à créer)
## Scripts utiles
- `build_mingw.bat` - Build l'application
- `set_gpu.ps1` - Configure le GPU NVIDIA
- `create_shortcut.ps1` - Crée un raccourci bureau
- `run_secondvoice.bat` - Lance l'application

View File

@ -111,16 +111,16 @@ if /i "%BUILD_TYPE%"=="Debug" (
REM Configure with CMake - force MinGW triplet via environment variable
echo [INFO] Configuring CMake for MinGW build...
echo [INFO] Forcing vcpkg triplet: x64-mingw-dynamic
set VCPKG_DEFAULT_TRIPLET=x64-mingw-dynamic
set VCPKG_DEFAULT_HOST_TRIPLET=x64-mingw-dynamic
echo [INFO] Forcing vcpkg triplet: x64-mingw-static
set VCPKG_DEFAULT_TRIPLET=x64-mingw-static
set VCPKG_DEFAULT_HOST_TRIPLET=x64-mingw-static
cmake -B build/mingw-%BUILD_TYPE% ^
-G Ninja ^
-DCMAKE_BUILD_TYPE=%BUILD_TYPE% ^
-DCMAKE_TOOLCHAIN_FILE=%VCPKG_ROOT%/scripts/buildsystems/vcpkg.cmake ^
-DVCPKG_TARGET_TRIPLET=x64-mingw-dynamic ^
-DVCPKG_HOST_TRIPLET=x64-mingw-dynamic ^
-DVCPKG_TARGET_TRIPLET=x64-mingw-static ^
-DVCPKG_HOST_TRIPLET=x64-mingw-static ^
-DCMAKE_C_COMPILER=gcc ^
-DCMAKE_CXX_COMPILER=g++ ^
-DCMAKE_MAKE_PROGRAM=ninja

3
src/gpu_exports.def Normal file
View File

@ -0,0 +1,3 @@
EXPORTS
NvOptimusEnablement DATA
AmdPowerXpressRequestHighPerformance DATA

View File

@ -1,82 +1,92 @@
#include <iostream>
#include <fstream>
#include <string>
#include "utils/Config.h"
#include "utils/Logger.h"
#include "core/Pipeline.h"
// Force NVIDIA GPU on Optimus systems (instead of integrated AMD/Intel)
// These MUST be global and volatile to prevent linker optimization
#ifdef _WIN32
#include <windows.h>
extern "C" {
// NVIDIA Optimus: Force high-performance GPU
__attribute__((visibility("default")))
__attribute__((used))
volatile unsigned long NvOptimusEnablement = 0x00000001;
// AMD PowerXpress: Force high-performance GPU
__attribute__((visibility("default")))
__attribute__((used))
volatile int AmdPowerXpressRequestHighPerformance = 1;
}
// Ensure the symbols are referenced so linker doesn't strip them
static void* __force_gpu_exports[] __attribute__((used)) = {
(void*)&NvOptimusEnablement,
(void*)&AmdPowerXpressRequestHighPerformance
};
#endif
// Simple file logger
void log_msg(const std::string& msg) {
std::ofstream log("main_debug.log", std::ios::app);
log << msg << std::endl;
log.flush();
std::cout << msg << std::endl;
}
int main(int argc, char** argv) {
(void)argc; // Unused
(void)argv; // Unused
(void)argc;
(void)argv;
#ifdef _WIN32
MessageBoxA(NULL, "SecondVoice starting...", "Debug", MB_OK);
#endif
LOG("========================================");
LOG("SecondVoice - Real-time Translation System");
LOG("========================================");
LOG("Starting application...");
log_msg("MAIN: Entry point reached");
log_msg("========================================");
log_msg("SecondVoice - Real-time Translation System");
log_msg("========================================");
log_msg("Starting application...");
// Load configuration
LOG("Loading configuration...");
log_msg("Loading configuration...");
secondvoice::Config& config = secondvoice::Config::getInstance();
if (!config.load("config.json", ".env")) {
LOG("ERROR: Failed to load configuration");
#ifdef _WIN32
MessageBoxA(NULL, "Failed to load configuration!", "Error", MB_OK | MB_ICONERROR);
#endif
log_msg("ERROR: Failed to load configuration");
return 1;
}
LOG("Configuration loaded successfully");
LOG("");
#ifdef _WIN32
MessageBoxA(NULL, "Config OK - Creating pipeline...", "Debug", MB_OK);
#endif
log_msg("Configuration loaded successfully");
log_msg("");
// Create and initialize pipeline
LOG("Creating pipeline...");
log_msg("Creating pipeline...");
secondvoice::Pipeline pipeline;
LOG("Initializing pipeline...");
log_msg("Initializing pipeline...");
if (!pipeline.initialize()) {
LOG("ERROR: Failed to initialize pipeline");
#ifdef _WIN32
MessageBoxA(NULL, "Failed to initialize pipeline!", "Error", MB_OK | MB_ICONERROR);
#endif
log_msg("ERROR: Failed to initialize pipeline");
return 1;
}
#ifdef _WIN32
MessageBoxA(NULL, "Pipeline initialized!", "Debug", MB_OK);
#endif
LOG("Pipeline initialized successfully");
LOG("Starting recording and translation...");
log_msg("Pipeline initialized successfully");
log_msg("Starting recording and translation...");
// Start pipeline
LOG("Starting pipeline...");
log_msg("Starting pipeline...");
if (!pipeline.start()) {
LOG("ERROR: Failed to start pipeline");
log_msg("ERROR: Failed to start pipeline");
return 1;
}
// Wait for pipeline to finish (user clicks Stop button)
LOG("Pipeline running, waiting for user to stop...");
log_msg("Pipeline running, waiting for user to stop...");
while (pipeline.isRunning()) {
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
LOG("");
LOG("Recording stopped");
LOG("Saving audio...");
log_msg("");
log_msg("Recording stopped");
log_msg("Saving audio...");
pipeline.stop();
LOG("Done!");
LOG("Check secondvoice_debug.log for details");
log_msg("Done!");
return 0;
}

155
src/main_console.cpp Normal file
View File

@ -0,0 +1,155 @@
#include <iostream>
#include <atomic>
#include <csignal>
#include <thread>
#include <chrono>
#include <vector>
#include <fstream>
#include <ctime>
#include <mutex>
#include "utils/Config.h"
#include "audio/AudioCapture.h"
#include "audio/AudioBuffer.h"
#include "api/WhisperClient.h"
#include "api/ClaudeClient.h"
std::atomic<bool> g_running{true};
void signalHandler(int signal) {
if (signal == SIGINT || signal == SIGTERM) {
std::cout << "\n\nStopping..." << std::endl;
g_running = false;
}
}
int main() {
std::signal(SIGINT, signalHandler);
std::signal(SIGTERM, signalHandler);
std::cout << "========================================" << std::endl;
std::cout << "SecondVoice - Console Mode" << std::endl;
std::cout << "========================================" << std::endl;
std::cout << "Real-time Chinese to French Translation" << std::endl;
std::cout << std::endl;
// Load config
std::cout << "[1/4] Loading configuration..." << std::endl;
secondvoice::Config& config = secondvoice::Config::getInstance();
if (!config.load("config.json", ".env")) {
std::cerr << "ERROR: Failed to load configuration" << std::endl;
return 1;
}
std::cout << " ✓ Configuration loaded" << std::endl;
// Initialize audio
std::cout << "[2/4] Initializing audio capture..." << std::endl;
auto& audio_cfg = config.getAudioConfig();
secondvoice::AudioCapture audio(
audio_cfg.sample_rate,
audio_cfg.channels,
audio_cfg.chunk_duration_seconds
);
if (!audio.initialize()) {
std::cerr << "ERROR: Failed to initialize audio" << std::endl;
return 1;
}
std::cout << " ✓ Audio initialized (16kHz, mono)" << std::endl;
// Initialize API clients
std::cout << "[3/4] Initializing API clients..." << std::endl;
secondvoice::WhisperClient whisper(config.getOpenAIKey());
secondvoice::ClaudeClient claude(config.getAnthropicKey());
std::cout << " ✓ Whisper API ready" << std::endl;
std::cout << " ✓ Claude API ready" << std::endl;
// Audio callback
int chunk_count = 0;
auto audioCallback = [&](const std::vector<float>& data) {
chunk_count++;
std::cout << "\n📼 Chunk #" << chunk_count << " captured (" << data.size() << " samples)" << std::endl;
// Create audio buffer
secondvoice::AudioBuffer buffer(audio_cfg.sample_rate, audio_cfg.channels);
buffer.addSamples(data);
// Save WAV
std::time_t now = std::time(nullptr);
std::string filename = "chunk_" + std::to_string(now) + ".wav";
if (!buffer.saveToWav(filename)) {
std::cerr << " ✗ Failed to save WAV" << std::endl;
return;
}
std::cout << " ✓ Saved: " << filename << std::endl;
// Transcribe
std::cout << " 🔄 Transcribing with Whisper..." << std::flush;
auto whisper_cfg = config.getWhisperConfig();
auto transcription = whisper.transcribe(
data,
audio_cfg.sample_rate,
audio_cfg.channels,
whisper_cfg.language,
whisper_cfg.prompt
);
if (!transcription) {
std::cout << "\r ✗ Transcription failed" << std::endl;
return;
}
std::string chinese = transcription->text;
std::cout << "\r ✓ Transcribed" << std::endl;
std::cout << " 中文: " << chinese << std::endl;
// Translate
std::cout << " 🔄 Translating with Claude..." << std::flush;
auto claude_cfg = config.getClaudeConfig();
auto translation = claude.translate(
chinese,
claude_cfg.system_prompt,
claude_cfg.max_tokens,
claude_cfg.temperature
);
if (!translation) {
std::cout << "\r ✗ Translation failed" << std::endl;
return;
}
std::cout << "\r ✓ Translated" << std::endl;
std::cout << " 🇫🇷 FR: " << translation->text << std::endl;
};
// Start recording
std::cout << "[4/4] Starting audio recording..." << std::endl;
if (!audio.start(audioCallback)) {
std::cerr << "ERROR: Failed to start recording" << std::endl;
return 1;
}
std::cout << std::endl;
std::cout << "========================================" << std::endl;
std::cout << "🎤 RECORDING - Speak in Chinese!" << std::endl;
std::cout << "========================================" << std::endl;
std::cout << "Press Ctrl+C to stop" << std::endl;
std::cout << std::endl;
int seconds = 0;
while (g_running) {
std::this_thread::sleep_for(std::chrono::seconds(1));
seconds++;
if (seconds % 10 == 0 && chunk_count == 0) {
std::cout << "[" << seconds << "s] Waiting for audio... (Ctrl+C to stop)" << std::endl;
}
}
std::cout << "\n========================================" << std::endl;
std::cout << "Stopping..." << std::endl;
audio.stop();
std::cout << "✓ Done! Processed " << chunk_count << " chunks in " << seconds << " seconds" << std::endl;
std::cout << "========================================" << std::endl;
return 0;
}

View File

@ -1,3 +1,5 @@
#include <glad/glad.h> // MUST be FIRST! Provides OpenGL functions
#define GLFW_INCLUDE_NONE // Tell GLFW not to include OpenGL headers (GLAD does it)
#include "TranslationUI.h"
#include <imgui.h>
#include <imgui_impl_glfw.h>
@ -34,10 +36,14 @@ bool TranslationUI::initialize() {
}
std::cout << "[UI] GLFW initialized successfully" << std::endl;
// OpenGL 3.3 + GLSL 330
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
// FORCE high-performance GPU (NVIDIA/AMD dedicated)
std::cout << "[UI] Requesting high-performance GPU..." << std::endl;
// OpenGL 4.6 Core Profile to match NVIDIA driver
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 6);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
// Create window
std::cout << "[UI] Creating GLFW window (" << width_ << "x" << height_ << ")..." << std::endl;
@ -52,6 +58,28 @@ bool TranslationUI::initialize() {
glfwMakeContextCurrent(window_);
glfwSwapInterval(1); // Enable vsync
// Initialize GLAD - MUST happen after glfwMakeContextCurrent!
std::cout << "[UI] Initializing GLAD OpenGL loader..." << std::endl;
if (!gladLoadGLLoader((GLADloadproc)glfwGetProcAddress)) {
std::cerr << "[UI] Failed to initialize GLAD!" << std::endl;
glfwDestroyWindow(window_);
glfwTerminate();
return false;
}
std::cout << "[UI] GLAD initialized successfully" << std::endl;
// Query OpenGL context to see which GPU we got
const GLubyte* vendor = glGetString(GL_VENDOR);
const GLubyte* renderer = glGetString(GL_RENDERER);
const GLubyte* version = glGetString(GL_VERSION);
std::cout << "[UI] ========================================" << std::endl;
std::cout << "[UI] OpenGL Context Info:" << std::endl;
std::cout << "[UI] Vendor: " << (vendor ? (const char*)vendor : "Unknown") << std::endl;
std::cout << "[UI] Renderer: " << (renderer ? (const char*)renderer : "Unknown") << std::endl;
std::cout << "[UI] Version: " << (version ? (const char*)version : "Unknown") << std::endl;
std::cout << "[UI] ========================================" << std::endl;
// Initialize ImGui
IMGUI_CHECKVERSION();
ImGui::CreateContext();
@ -61,7 +89,8 @@ bool TranslationUI::initialize() {
ImGui::StyleColorsDark();
ImGui_ImplGlfw_InitForOpenGL(window_, true);
ImGui_ImplOpenGL3_Init("#version 330");
// Use GLSL 460 core to match OpenGL 4.6
ImGui_ImplOpenGL3_Init("#version 460 core");
return true;
}

View File

@ -2,6 +2,7 @@
#include <string>
#include <vector>
#define GLFW_INCLUDE_NONE // Don't include OpenGL headers (GLAD provides them)
#include <GLFW/glfw3.h>
namespace secondvoice {

View File

@ -44,18 +44,25 @@ bool Config::load(const std::string& config_path, const std::string& env_path) {
}
// Load config.json
std::cerr << "[Config] Opening config file: " << config_path << std::endl;
std::ifstream config_file(config_path);
if (!config_file.is_open()) {
std::cerr << "Error: Could not open config file: " << config_path << std::endl;
return false;
}
std::cerr << "[Config] File opened successfully" << std::endl;
json config_json;
try {
std::cerr << "[Config] About to parse JSON..." << std::endl;
config_file >> config_json;
std::cerr << "[Config] JSON parsed successfully" << std::endl;
} catch (const json::parse_error& e) {
std::cerr << "Error parsing config.json: " << e.what() << std::endl;
return false;
} catch (...) {
std::cerr << "Unknown error parsing config.json" << std::endl;
return false;
}
// Parse audio config

View File

@ -10,6 +10,7 @@
"features": ["glfw-binding", "opengl3-binding"]
},
"glfw3",
"glad",
"opengl"
]
}