secondvoice/src/main.cpp
Trouve Alexis fa8ea2907b feat: Major improvements - WinHTTP, gpt-4o-mini, Opus, sliding window
- Replace cpp-httplib with native WinHTTP for HTTPS support
- Switch from whisper-1 to gpt-4o-mini-transcribe model
- Use Opus/OGG encoding instead of WAV (~10x smaller files)
- Implement sliding window audio capture with overlap
- Add transcription deduplication for overlapping segments
- Add Voice Activity Detection (VAD) to filter silence/noise
- Filter Whisper hallucinations (Amara.org, etc.)
- Add UTF-8 console support for Chinese characters
- Add Chinese font loading in ImGui
- Make Claude responses concise (translation only, no explanations)
- Configurable window size, font size, chunk duration/step

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-23 12:17:41 +08:00

104 lines
2.9 KiB
C++

#include <iostream>
#include <fstream>
#include <string>
#include "utils/Config.h"
#include "core/Pipeline.h"
#ifdef _WIN32
#include <windows.h>
#endif
// Force NVIDIA GPU on Optimus systems (instead of integrated AMD/Intel)
// These MUST be global and volatile to prevent linker optimization
#ifdef _WIN32
extern "C" {
// NVIDIA Optimus: Force high-performance GPU
__attribute__((visibility("default")))
__attribute__((used))
volatile unsigned long NvOptimusEnablement = 0x00000001;
// AMD PowerXpress: Force high-performance GPU
__attribute__((visibility("default")))
__attribute__((used))
volatile int AmdPowerXpressRequestHighPerformance = 1;
}
// Ensure the symbols are referenced so linker doesn't strip them
static void* __force_gpu_exports[] __attribute__((used)) = {
(void*)&NvOptimusEnablement,
(void*)&AmdPowerXpressRequestHighPerformance
};
#endif
// Simple file logger
void log_msg(const std::string& msg) {
std::ofstream log("main_debug.log", std::ios::app);
log << msg << std::endl;
log.flush();
std::cout << msg << std::endl;
}
int main(int argc, char** argv) {
(void)argc;
(void)argv;
#ifdef _WIN32
// Enable UTF-8 console output for Chinese characters
SetConsoleOutputCP(CP_UTF8);
SetConsoleCP(CP_UTF8);
#endif
log_msg("MAIN: Entry point reached");
log_msg("========================================");
log_msg("SecondVoice - Real-time Translation System");
log_msg("========================================");
log_msg("Starting application...");
// Load configuration
log_msg("Loading configuration...");
secondvoice::Config& config = secondvoice::Config::getInstance();
if (!config.load("config.json", ".env")) {
log_msg("ERROR: Failed to load configuration");
return 1;
}
log_msg("Configuration loaded successfully");
log_msg("");
// Create and initialize pipeline
log_msg("Creating pipeline...");
secondvoice::Pipeline pipeline;
log_msg("Initializing pipeline...");
if (!pipeline.initialize()) {
log_msg("ERROR: Failed to initialize pipeline");
return 1;
}
log_msg("Pipeline initialized successfully");
log_msg("Starting recording and translation...");
// Start pipeline (background threads for audio + API calls)
log_msg("Starting pipeline...");
if (!pipeline.start()) {
log_msg("ERROR: Failed to start pipeline");
return 1;
}
// Main loop - UI runs in main thread (required by GLFW)
log_msg("Pipeline running, entering main loop...");
while (pipeline.isRunning() && !pipeline.shouldClose()) {
pipeline.update(); // Render one frame
std::this_thread::sleep_for(std::chrono::milliseconds(16)); // ~60 FPS
}
log_msg("");
log_msg("Recording stopped");
log_msg("Saving audio...");
pipeline.stop();
log_msg("Done!");
return 0;
}