feat: Implement complete MVP architecture for SecondVoice
Complete implementation of the real-time Chinese-to-French translation system: Architecture: - 3-threaded pipeline: Audio capture → AI processing → UI rendering - Thread-safe queues for inter-thread communication - Configurable audio chunk sizes for latency tuning Core Features: - Audio capture with PortAudio (configurable sample rate/channels) - Whisper API integration for Chinese speech-to-text - Claude API integration for Chinese-to-French translation - ImGui real-time display with stop button - Full recording saved to WAV on stop Modules Implemented: - audio/: AudioCapture (PortAudio wrapper) + AudioBuffer (WAV export) - api/: WhisperClient + ClaudeClient (HTTP API wrappers) - ui/: TranslationUI (ImGui interface) - core/: Pipeline (orchestrates all threads) - utils/: Config (JSON/.env loader) + ThreadSafeQueue (template) Build System: - CMake with vcpkg for dependency management - vcpkg.json manifest for reproducible builds - build.sh helper script Configuration: - config.json: Audio settings, API parameters, UI config - .env: API keys (OpenAI + Anthropic) Documentation: - README.md: Setup instructions, usage, architecture - docs/implementation_plan.md: Technical design document - docs/SecondVoice.md: Project vision and motivation Next Steps: - Test build with vcpkg dependencies - Test audio capture on real hardware - Validate API integrations - Tune chunk size for optimal latency 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
6248fb2322
commit
5b60acaa73
5
.env.example
Normal file
5
.env.example
Normal file
@ -0,0 +1,5 @@
|
||||
# OpenAI API Key (for Whisper)
|
||||
OPENAI_API_KEY=sk-...
|
||||
|
||||
# Anthropic API Key (for Claude)
|
||||
ANTHROPIC_API_KEY=sk-ant-...
|
||||
45
.gitignore
vendored
Normal file
45
.gitignore
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
# Build directories
|
||||
build/
|
||||
cmake-build-*/
|
||||
out/
|
||||
|
||||
# vcpkg
|
||||
vcpkg_installed/
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# Compiled files
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
*.exe
|
||||
|
||||
# Environment and secrets
|
||||
.env
|
||||
|
||||
# Recordings (generated at runtime)
|
||||
recordings/*.wav
|
||||
recordings/*.mp3
|
||||
recordings/*.txt
|
||||
recordings/*.md
|
||||
recordings/*.json
|
||||
!recordings/.gitkeep
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# CMake
|
||||
CMakeCache.txt
|
||||
CMakeFiles/
|
||||
cmake_install.cmake
|
||||
Makefile
|
||||
compile_commands.json
|
||||
67
CMakeLists.txt
Normal file
67
CMakeLists.txt
Normal file
@ -0,0 +1,67 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(SecondVoice VERSION 0.1.0 LANGUAGES CXX)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
# Find packages
|
||||
find_package(portaudio CONFIG REQUIRED)
|
||||
find_package(httplib CONFIG REQUIRED)
|
||||
find_package(nlohmann_json CONFIG REQUIRED)
|
||||
find_package(imgui CONFIG REQUIRED)
|
||||
find_package(glfw3 CONFIG REQUIRED)
|
||||
find_package(OpenGL REQUIRED)
|
||||
|
||||
# Source files
|
||||
set(SOURCES
|
||||
src/main.cpp
|
||||
# Audio module
|
||||
src/audio/AudioCapture.cpp
|
||||
src/audio/AudioBuffer.cpp
|
||||
# API clients
|
||||
src/api/WhisperClient.cpp
|
||||
src/api/ClaudeClient.cpp
|
||||
# UI
|
||||
src/ui/TranslationUI.cpp
|
||||
# Utils
|
||||
src/utils/Config.cpp
|
||||
# Core
|
||||
src/core/Pipeline.cpp
|
||||
)
|
||||
|
||||
# Executable
|
||||
add_executable(${PROJECT_NAME} ${SOURCES})
|
||||
|
||||
# Include directories
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src
|
||||
)
|
||||
|
||||
# Link libraries
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE
|
||||
portaudio
|
||||
httplib::httplib
|
||||
nlohmann_json::nlohmann_json
|
||||
imgui::imgui
|
||||
glfw
|
||||
OpenGL::GL
|
||||
)
|
||||
|
||||
# Compiler options
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE
|
||||
-Wall
|
||||
-Wextra
|
||||
-Wpedantic
|
||||
-Werror
|
||||
)
|
||||
endif()
|
||||
|
||||
# Copy config files to build directory
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.json
|
||||
${CMAKE_CURRENT_BINARY_DIR}/config.json COPYONLY)
|
||||
|
||||
# Install target
|
||||
install(TARGETS ${PROJECT_NAME} DESTINATION bin)
|
||||
install(FILES config.json DESTINATION bin)
|
||||
223
README.md
Normal file
223
README.md
Normal file
@ -0,0 +1,223 @@
|
||||
# SecondVoice
|
||||
|
||||
Real-time Chinese to French translation system for live meetings.
|
||||
|
||||
## Overview
|
||||
|
||||
SecondVoice captures audio, transcribes Chinese speech using OpenAI's Whisper API, and translates it to French using Claude AI in real-time. Perfect for understanding Chinese meetings on the fly.
|
||||
|
||||
## Features
|
||||
|
||||
- 🎤 Real-time audio capture
|
||||
- 🗣️ Chinese speech-to-text (Whisper API)
|
||||
- 🌐 Chinese to French translation (Claude API)
|
||||
- 🖥️ Clean ImGui interface
|
||||
- 💾 Full recording saved to disk
|
||||
- ⚙️ Configurable chunk sizes and settings
|
||||
|
||||
## Requirements
|
||||
|
||||
### System Dependencies (Linux)
|
||||
|
||||
```bash
|
||||
# PortAudio
|
||||
sudo apt install libasound2-dev
|
||||
|
||||
# OpenGL
|
||||
sudo apt install libgl1-mesa-dev libglu1-mesa-dev
|
||||
```
|
||||
|
||||
### vcpkg
|
||||
|
||||
Install vcpkg if not already installed:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/microsoft/vcpkg.git
|
||||
cd vcpkg
|
||||
./bootstrap-vcpkg.sh
|
||||
export VCPKG_ROOT=$(pwd)
|
||||
```
|
||||
|
||||
## Setup
|
||||
|
||||
1. **Clone the repository**
|
||||
|
||||
```bash
|
||||
git clone <repository-url>
|
||||
cd secondvoice
|
||||
```
|
||||
|
||||
2. **Create `.env` file** (copy from `.env.example`)
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
# Edit .env and add your API keys:
|
||||
# OPENAI_API_KEY=sk-...
|
||||
# ANTHROPIC_API_KEY=sk-ant-...
|
||||
```
|
||||
|
||||
3. **Configure settings** (optional)
|
||||
|
||||
Edit `config.json` to customize:
|
||||
- Audio chunk duration (default: 10s)
|
||||
- Sample rate (default: 16kHz)
|
||||
- UI window size
|
||||
- Output directory
|
||||
|
||||
4. **Build the project**
|
||||
|
||||
```bash
|
||||
# Configure with vcpkg
|
||||
cmake -B build -DCMAKE_TOOLCHAIN_FILE=$VCPKG_ROOT/scripts/buildsystems/vcpkg.cmake
|
||||
|
||||
# Build
|
||||
cmake --build build -j$(nproc)
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
cd build
|
||||
./SecondVoice
|
||||
```
|
||||
|
||||
The application will:
|
||||
1. Open an ImGui window
|
||||
2. Start capturing audio from your microphone
|
||||
3. Display Chinese transcriptions and French translations in real-time
|
||||
4. Click **STOP RECORDING** button to finish
|
||||
5. Save the full audio recording to `recordings/recording_YYYYMMDD_HHMMSS.wav`
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
Audio Capture (PortAudio)
|
||||
↓
|
||||
Whisper API (Speech-to-Text)
|
||||
↓
|
||||
Claude API (Translation)
|
||||
↓
|
||||
ImGui UI (Display)
|
||||
```
|
||||
|
||||
### Threading Model
|
||||
|
||||
- **Thread 1**: Audio capture (PortAudio callback)
|
||||
- **Thread 2**: AI processing (Whisper + Claude API calls)
|
||||
- **Thread 3**: UI rendering (ImGui + OpenGL)
|
||||
|
||||
## Configuration
|
||||
|
||||
### config.json
|
||||
|
||||
```json
|
||||
{
|
||||
"audio": {
|
||||
"sample_rate": 16000,
|
||||
"channels": 1,
|
||||
"chunk_duration_seconds": 10
|
||||
},
|
||||
"whisper": {
|
||||
"model": "whisper-1",
|
||||
"language": "zh"
|
||||
},
|
||||
"claude": {
|
||||
"model": "claude-haiku-4-20250514",
|
||||
"max_tokens": 1024
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### .env
|
||||
|
||||
```env
|
||||
OPENAI_API_KEY=sk-...
|
||||
ANTHROPIC_API_KEY=sk-ant-...
|
||||
```
|
||||
|
||||
## Cost Estimation
|
||||
|
||||
- **Whisper**: ~$0.006/minute (~$0.36/hour)
|
||||
- **Claude Haiku**: ~$0.03-0.05/hour
|
||||
- **Total**: ~$0.40/hour of recording
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
secondvoice/
|
||||
├── src/
|
||||
│ ├── main.cpp # Entry point
|
||||
│ ├── audio/ # Audio capture & buffer
|
||||
│ ├── api/ # Whisper & Claude clients
|
||||
│ ├── ui/ # ImGui interface
|
||||
│ ├── utils/ # Config & thread-safe queue
|
||||
│ └── core/ # Pipeline orchestration
|
||||
├── docs/ # Documentation
|
||||
├── recordings/ # Output recordings
|
||||
├── config.json # Runtime configuration
|
||||
├── .env # API keys (not committed)
|
||||
└── CMakeLists.txt # Build configuration
|
||||
```
|
||||
|
||||
## Development
|
||||
|
||||
### Building in Debug Mode
|
||||
|
||||
```bash
|
||||
cmake -B build -DCMAKE_BUILD_TYPE=Debug -DCMAKE_TOOLCHAIN_FILE=$VCPKG_ROOT/scripts/buildsystems/vcpkg.cmake
|
||||
cmake --build build
|
||||
```
|
||||
|
||||
### Running Tests
|
||||
|
||||
```bash
|
||||
# TODO: Add tests
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### No audio capture
|
||||
|
||||
- Check microphone permissions
|
||||
- Verify PortAudio is properly installed: `pa_devs` (if available)
|
||||
- Try different audio device in code
|
||||
|
||||
### API errors
|
||||
|
||||
- Verify API keys in `.env` are correct
|
||||
- Check internet connection
|
||||
- Monitor API rate limits
|
||||
|
||||
### Build errors
|
||||
|
||||
- Ensure vcpkg is properly set up
|
||||
- Check all system dependencies are installed
|
||||
- Try `cmake --build build --clean-first`
|
||||
|
||||
## Roadmap
|
||||
|
||||
### Phase 1 - MVP (Current)
|
||||
- ✅ Audio capture
|
||||
- ✅ Whisper integration
|
||||
- ✅ Claude integration
|
||||
- ✅ ImGui UI
|
||||
- ✅ Stop button
|
||||
|
||||
### Phase 2 - Enhancement
|
||||
- ⬜ Auto-summary post-meeting
|
||||
- ⬜ Export transcripts
|
||||
- ⬜ Search functionality
|
||||
- ⬜ Speaker diarization
|
||||
- ⬜ Replay mode
|
||||
|
||||
## License
|
||||
|
||||
See LICENSE file.
|
||||
|
||||
## Contributing
|
||||
|
||||
This is a personal project, but suggestions and bug reports are welcome via issues.
|
||||
|
||||
## Contact
|
||||
|
||||
See docs/SecondVoice.md for project context and motivation.
|
||||
50
build.sh
Normal file
50
build.sh
Normal file
@ -0,0 +1,50 @@
|
||||
#!/bin/bash
|
||||
# Build script for SecondVoice
|
||||
|
||||
set -e
|
||||
|
||||
# Colors
|
||||
GREEN='\033[0;32m'
|
||||
RED='\033[0;31m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
echo "SecondVoice Build Script"
|
||||
echo "======================="
|
||||
echo ""
|
||||
|
||||
# Check if vcpkg is set
|
||||
if [ -z "$VCPKG_ROOT" ]; then
|
||||
echo -e "${RED}Error: VCPKG_ROOT not set${NC}"
|
||||
echo "Please install vcpkg and set VCPKG_ROOT environment variable:"
|
||||
echo " git clone https://github.com/microsoft/vcpkg.git"
|
||||
echo " cd vcpkg && ./bootstrap-vcpkg.sh"
|
||||
echo " export VCPKG_ROOT=\$(pwd)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "${GREEN}vcpkg found at: $VCPKG_ROOT${NC}"
|
||||
echo ""
|
||||
|
||||
# Check if .env exists
|
||||
if [ ! -f ".env" ]; then
|
||||
echo -e "${RED}Warning: .env file not found${NC}"
|
||||
echo "Please create .env from .env.example and add your API keys"
|
||||
echo " cp .env.example .env"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Configure
|
||||
echo "Configuring CMake..."
|
||||
cmake -B build -DCMAKE_TOOLCHAIN_FILE=$VCPKG_ROOT/scripts/buildsystems/vcpkg.cmake
|
||||
|
||||
# Build
|
||||
echo ""
|
||||
echo "Building..."
|
||||
cmake --build build -j$(nproc)
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}Build successful!${NC}"
|
||||
echo ""
|
||||
echo "To run the application:"
|
||||
echo " cd build"
|
||||
echo " ./SecondVoice"
|
||||
29
config.json
Normal file
29
config.json
Normal file
@ -0,0 +1,29 @@
|
||||
{
|
||||
"audio": {
|
||||
"sample_rate": 16000,
|
||||
"channels": 1,
|
||||
"chunk_duration_seconds": 10,
|
||||
"format": "wav"
|
||||
},
|
||||
"whisper": {
|
||||
"model": "whisper-1",
|
||||
"language": "zh",
|
||||
"temperature": 0.0
|
||||
},
|
||||
"claude": {
|
||||
"model": "claude-haiku-4-20250514",
|
||||
"max_tokens": 1024,
|
||||
"temperature": 0.3,
|
||||
"system_prompt": "Tu es un traducteur professionnel chinois-français. Traduis le texte suivant de manière naturelle et contextuelle."
|
||||
},
|
||||
"ui": {
|
||||
"window_width": 800,
|
||||
"window_height": 600,
|
||||
"font_size": 16,
|
||||
"max_display_lines": 50
|
||||
},
|
||||
"recording": {
|
||||
"save_audio": true,
|
||||
"output_directory": "./recordings"
|
||||
}
|
||||
}
|
||||
494
docs/implementation_plan.md
Normal file
494
docs/implementation_plan.md
Normal file
@ -0,0 +1,494 @@
|
||||
# SecondVoice - Plan d'Implémentation MVP
|
||||
|
||||
**Date**: 20 novembre 2025
|
||||
**Target**: MVP minimal fonctionnel
|
||||
**Platform**: Linux
|
||||
**Package Manager**: vcpkg
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Objectif MVP Minimal
|
||||
|
||||
Application desktop qui:
|
||||
1. Capture audio microphone en continu
|
||||
2. Transcrit chinois → texte (Whisper API)
|
||||
3. Traduit texte → français (Claude API)
|
||||
4. Affiche traduction temps réel (ImGui)
|
||||
5. Bouton Stop pour arrêter (pas de résumé MVP)
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Architecture Technique
|
||||
|
||||
### Pipeline
|
||||
```
|
||||
Audio Capture (PortAudio)
|
||||
↓ (chunks audio configurables)
|
||||
Whisper API (STT)
|
||||
↓ (texte chinois)
|
||||
Claude API (traduction)
|
||||
↓ (texte français)
|
||||
ImGui UI (display temps réel + bouton Stop)
|
||||
```
|
||||
|
||||
### Threading Model
|
||||
```
|
||||
Thread 1 - Audio Capture:
|
||||
- PortAudio callback capture audio
|
||||
- Accumule chunks (taille configurable)
|
||||
- Push dans queue thread-safe
|
||||
- Save WAV backup en background
|
||||
|
||||
Thread 2 - AI Processing:
|
||||
- Pop chunk depuis audio queue
|
||||
- POST Whisper API → transcription chinoise
|
||||
- POST Claude API → traduction française
|
||||
- Push résultat dans UI queue
|
||||
|
||||
Thread 3 - Main UI (ImGui):
|
||||
- Render window ImGui
|
||||
- Display traductions depuis queue
|
||||
- Handle bouton Stop
|
||||
- Update status/duration
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📁 Structure Projet
|
||||
|
||||
```
|
||||
secondvoice/
|
||||
├── .env # API keys (OPENAI_API_KEY, ANTHROPIC_API_KEY)
|
||||
├── .gitignore
|
||||
├── CMakeLists.txt # Build configuration
|
||||
├── vcpkg.json # Dependencies manifest
|
||||
├── config.json # Runtime config (audio chunk size, etc)
|
||||
├── README.md
|
||||
├── docs/
|
||||
│ ├── SecondVoice.md # Vision document
|
||||
│ └── implementation_plan.md # Ce document
|
||||
├── src/
|
||||
│ ├── main.cpp # Entry point + ImGui main loop
|
||||
│ ├── audio/
|
||||
│ │ ├── AudioCapture.h
|
||||
│ │ ├── AudioCapture.cpp # PortAudio wrapper
|
||||
│ │ ├── AudioBuffer.h
|
||||
│ │ └── AudioBuffer.cpp # Thread-safe ring buffer
|
||||
│ ├── api/
|
||||
│ │ ├── WhisperClient.h
|
||||
│ │ ├── WhisperClient.cpp # Whisper API client
|
||||
│ │ ├── ClaudeClient.h
|
||||
│ │ └── ClaudeClient.cpp # Claude API client
|
||||
│ ├── ui/
|
||||
│ │ ├── TranslationUI.h
|
||||
│ │ └── TranslationUI.cpp # ImGui interface
|
||||
│ ├── utils/
|
||||
│ │ ├── Config.h
|
||||
│ │ ├── Config.cpp # Load .env + config.json
|
||||
│ │ ├── ThreadSafeQueue.h # Template queue thread-safe
|
||||
│ │ └── Logger.h # Simple logging
|
||||
│ └── core/
|
||||
│ ├── Pipeline.h
|
||||
│ └── Pipeline.cpp # Orchestrate threads
|
||||
├── recordings/ # Output audio files
|
||||
│ └── .gitkeep
|
||||
└── build/ # CMake build output (ignored)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Dépendances
|
||||
|
||||
### vcpkg.json
|
||||
```json
|
||||
{
|
||||
"name": "secondvoice",
|
||||
"version": "0.1.0",
|
||||
"dependencies": [
|
||||
"portaudio",
|
||||
"cpp-httplib",
|
||||
"nlohmann-json",
|
||||
"imgui[glfw-binding,opengl3-binding]",
|
||||
"glfw3",
|
||||
"opengl"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### System Requirements (Linux)
|
||||
```bash
|
||||
# PortAudio dependencies
|
||||
sudo apt install libasound2-dev
|
||||
|
||||
# OpenGL dependencies
|
||||
sudo apt install libgl1-mesa-dev libglu1-mesa-dev
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ⚙️ Configuration
|
||||
|
||||
### .env (racine projet)
|
||||
```env
|
||||
OPENAI_API_KEY=sk-...
|
||||
ANTHROPIC_API_KEY=sk-ant-...
|
||||
```
|
||||
|
||||
### config.json (racine projet)
|
||||
```json
|
||||
{
|
||||
"audio": {
|
||||
"sample_rate": 16000,
|
||||
"channels": 1,
|
||||
"chunk_duration_seconds": 10,
|
||||
"format": "wav"
|
||||
},
|
||||
"whisper": {
|
||||
"model": "whisper-1",
|
||||
"language": "zh",
|
||||
"temperature": 0.0
|
||||
},
|
||||
"claude": {
|
||||
"model": "claude-haiku-4-20250514",
|
||||
"max_tokens": 1024,
|
||||
"temperature": 0.3,
|
||||
"system_prompt": "Tu es un traducteur professionnel chinois-français. Traduis le texte suivant de manière naturelle et contextuelle."
|
||||
},
|
||||
"ui": {
|
||||
"window_width": 800,
|
||||
"window_height": 600,
|
||||
"font_size": 16,
|
||||
"max_display_lines": 50
|
||||
},
|
||||
"recording": {
|
||||
"save_audio": true,
|
||||
"output_directory": "./recordings"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔌 API Clients
|
||||
|
||||
### Whisper API
|
||||
```cpp
|
||||
// POST https://api.openai.com/v1/audio/transcriptions
|
||||
// Content-Type: multipart/form-data
|
||||
|
||||
Request:
|
||||
- file: audio.wav (binary)
|
||||
- model: whisper-1
|
||||
- language: zh
|
||||
- temperature: 0.0
|
||||
|
||||
Response:
|
||||
{
|
||||
"text": "你好,今天我们讨论项目进度..."
|
||||
}
|
||||
```
|
||||
|
||||
### Claude API
|
||||
```cpp
|
||||
// POST https://api.anthropic.com/v1/messages
|
||||
// Content-Type: application/json
|
||||
// x-api-key: {ANTHROPIC_API_KEY}
|
||||
// anthropic-version: 2023-06-01
|
||||
|
||||
Request:
|
||||
{
|
||||
"model": "claude-haiku-4-20250514",
|
||||
"max_tokens": 1024,
|
||||
"messages": [{
|
||||
"role": "user",
|
||||
"content": "Traduis en français: 你好,今天我们讨论项目进度..."
|
||||
}]
|
||||
}
|
||||
|
||||
Response:
|
||||
{
|
||||
"content": [{
|
||||
"type": "text",
|
||||
"text": "Bonjour, aujourd'hui nous discutons de l'avancement du projet..."
|
||||
}],
|
||||
"model": "claude-haiku-4-20250514",
|
||||
"usage": {...}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎨 Interface ImGui
|
||||
|
||||
### Layout Minimaliste
|
||||
```
|
||||
┌────────────────────────────────────────────┐
|
||||
│ SecondVoice - Live Translation │
|
||||
├────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ [●] Recording... Duration: 00:05:23 │
|
||||
│ │
|
||||
│ ┌────────────────────────────────────────┐ │
|
||||
│ │ 中文: 你好,今天我们讨论项目进度... │ │
|
||||
│ │ FR: Bonjour, aujourd'hui nous │ │
|
||||
│ │ discutons de l'avancement... │ │
|
||||
│ │ │ │
|
||||
│ │ 中文: 关于预算的问题... │ │
|
||||
│ │ FR: Concernant la question du budget.. │ │
|
||||
│ │ │ │
|
||||
│ │ [Auto-scroll enabled] │ │
|
||||
│ │ │ │
|
||||
│ └────────────────────────────────────────┘ │
|
||||
│ │
|
||||
│ [ STOP RECORDING ] │
|
||||
│ │
|
||||
│ Status: Processing chunk 12/12 │
|
||||
│ Audio: 16kHz mono, chunk size: 10s │
|
||||
└────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Features UI
|
||||
- **Scrollable text area**: Auto-scroll, peut désactiver pour review
|
||||
- **Color coding**: Chinois (couleur 1), Français (couleur 2)
|
||||
- **Status bar**: Duration, chunk count, processing status
|
||||
- **Stop button**: Arrête capture + processing, sauvegarde audio
|
||||
- **Window resizable**: Layout adaptatif
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Ordre d'Implémentation
|
||||
|
||||
### Phase 1 - Setup Infrastructure (Jour 1)
|
||||
**Todo**:
|
||||
1. ✅ Créer structure projet
|
||||
2. ✅ Setup CMakeLists.txt avec vcpkg
|
||||
3. ✅ Créer .gitignore (.env, build/, recordings/)
|
||||
4. ✅ Créer config.json template
|
||||
5. ✅ Setup .env (API keys)
|
||||
6. ✅ Test build minimal (hello world)
|
||||
|
||||
**Validation**: `cmake -B build && cmake --build build` compile sans erreurs
|
||||
|
||||
---
|
||||
|
||||
### Phase 2 - Audio Capture (Jour 1-2)
|
||||
**Todo**:
|
||||
1. Implémenter `AudioCapture.h/cpp`:
|
||||
- Init PortAudio
|
||||
- Callback capture audio
|
||||
- Accumulation chunks (configurable duration)
|
||||
- Push dans ThreadSafeQueue
|
||||
2. Implémenter `AudioBuffer.h/cpp`:
|
||||
- Ring buffer pour audio raw
|
||||
- Thread-safe operations
|
||||
3. Test standalone: Capture 30s audio → save WAV
|
||||
|
||||
**Validation**: Audio WAV lisible, durée correcte, qualité OK
|
||||
|
||||
---
|
||||
|
||||
### Phase 3 - Whisper Client (Jour 2)
|
||||
**Todo**:
|
||||
1. Implémenter `WhisperClient.h/cpp`:
|
||||
- Load API key depuis .env
|
||||
- POST multipart/form-data (cpp-httplib)
|
||||
- Encode audio WAV en memory
|
||||
- Parse JSON response
|
||||
- Error handling (retry, timeout)
|
||||
2. Test standalone: Audio file → Whisper → texte chinois
|
||||
|
||||
**Validation**: Transcription chinoise correcte sur sample audio
|
||||
|
||||
---
|
||||
|
||||
### Phase 4 - Claude Client (Jour 2-3)
|
||||
**Todo**:
|
||||
1. Implémenter `ClaudeClient.h/cpp`:
|
||||
- Load API key depuis .env
|
||||
- POST JSON request (cpp-httplib)
|
||||
- System prompt configurable
|
||||
- Parse response (extract text)
|
||||
- Error handling
|
||||
2. Test standalone: Texte chinois → Claude → texte français
|
||||
|
||||
**Validation**: Traduction française naturelle et correcte
|
||||
|
||||
---
|
||||
|
||||
### Phase 5 - ImGui UI (Jour 3)
|
||||
**Todo**:
|
||||
1. Setup ImGui + GLFW + OpenGL:
|
||||
- Window creation
|
||||
- Render loop
|
||||
- Input handling
|
||||
2. Implémenter `TranslationUI.h/cpp`:
|
||||
- Scrollable text area
|
||||
- Display messages (CN + FR)
|
||||
- Button Stop
|
||||
- Status bar (duration, chunk count)
|
||||
3. Test standalone: Afficher mock data
|
||||
|
||||
**Validation**: UI responsive, affichage texte OK, bouton fonctionne
|
||||
|
||||
---
|
||||
|
||||
### Phase 6 - Pipeline Integration (Jour 4)
|
||||
**Todo**:
|
||||
1. Implémenter `Pipeline.h/cpp`:
|
||||
- Thread 1: AudioCapture loop
|
||||
- Thread 2: Processing loop (Whisper → Claude)
|
||||
- Thread 3: UI loop (ImGui)
|
||||
- ThreadSafeQueue entre threads
|
||||
- Synchronisation (start/stop)
|
||||
2. Implémenter `Config.h/cpp`:
|
||||
- Load .env (API keys)
|
||||
- Load config.json (settings)
|
||||
3. Implémenter `main.cpp`:
|
||||
- Init all components
|
||||
- Start pipeline
|
||||
- Handle graceful shutdown
|
||||
|
||||
**Validation**: Pipeline complet fonctionne bout-à-bout
|
||||
|
||||
---
|
||||
|
||||
### Phase 7 - Testing & Tuning (Jour 5)
|
||||
**Todo**:
|
||||
1. Test avec audio réel chinois:
|
||||
- Sample conversations
|
||||
- Different audio qualities
|
||||
- Different chunk sizes (5s, 10s, 30s)
|
||||
2. Measure latence:
|
||||
- Audio → Whisper: X secondes
|
||||
- Whisper → Claude: Y secondes
|
||||
- Total: Z secondes
|
||||
3. Debug & fix bugs:
|
||||
- Memory leaks
|
||||
- Thread safety issues
|
||||
- API errors handling
|
||||
4. Optimize:
|
||||
- Chunk size optimal (tradeoff latency vs accuracy)
|
||||
- API timeout values
|
||||
- UI refresh rate
|
||||
|
||||
**Validation**:
|
||||
- Latence totale < 10s acceptable
|
||||
- Pas de crash sur 30min recording
|
||||
- Transcription + traduction compréhensibles
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Test Plan
|
||||
|
||||
### Unit Tests (Phase 2+)
|
||||
- `AudioCapture`: Capture audio, format correct
|
||||
- `WhisperClient`: API call mock, parsing JSON
|
||||
- `ClaudeClient`: API call mock, parsing JSON
|
||||
- `ThreadSafeQueue`: Thread safety, no data loss
|
||||
|
||||
### Integration Tests
|
||||
- Audio → Whisper: Audio file → texte chinois correct
|
||||
- Whisper → Claude: Texte chinois → traduction française correcte
|
||||
- Pipeline: Audio → UI display complet
|
||||
|
||||
### End-to-End Test
|
||||
- Recording 5min conversation chinoise réelle
|
||||
- Vérifier transcription accuracy (>85%)
|
||||
- Vérifier traduction compréhensible
|
||||
- Vérifier UI responsive
|
||||
- Vérifier audio sauvegardé correctement
|
||||
|
||||
---
|
||||
|
||||
## 📊 Metrics à Tracker
|
||||
|
||||
### Performance
|
||||
- **Latence Whisper**: Temps API call (target: <3s pour 10s audio)
|
||||
- **Latence Claude**: Temps API call (target: <2s pour 200 tokens)
|
||||
- **Latence totale**: Audio → Display (target: <10s)
|
||||
- **Memory usage**: Stable sur longue durée (no leaks)
|
||||
- **CPU usage**: Acceptable (<50% sur laptop)
|
||||
|
||||
### Qualité
|
||||
- **Whisper accuracy**: % mots corrects (target: >85%)
|
||||
- **Claude quality**: Traduction naturelle (subjective)
|
||||
- **Crash rate**: 0 crash sur 1h recording
|
||||
|
||||
### Cost
|
||||
- **Whisper**: $0.006/min audio
|
||||
- **Claude**: ~$0.03-0.05/h (depends on text volume)
|
||||
- **Total**: ~$0.40/h meeting
|
||||
|
||||
---
|
||||
|
||||
## ⚠️ Risks & Mitigations
|
||||
|
||||
| Risk | Impact | Mitigation |
|
||||
|------|--------|------------|
|
||||
| **Whisper API timeout** | Bloquant | Retry logic, timeout 30s, fallback queue |
|
||||
| **Claude API rate limit** | Moyen | Exponential backoff, queue requests |
|
||||
| **Audio buffer overflow** | Moyen | Ring buffer size adequate, drop old chunks if needed |
|
||||
| **Thread deadlock** | Bloquant | Use std::lock_guard, avoid nested locks |
|
||||
| **Memory leak** | Moyen | Use smart pointers, valgrind tests |
|
||||
| **Network interruption** | Moyen | Retry logic, cache audio locally |
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Success Criteria MVP
|
||||
|
||||
✅ **MVP validé si**:
|
||||
1. Capture audio microphone fonctionne
|
||||
2. Transcription chinoise >85% précise
|
||||
3. Traduction française compréhensible
|
||||
4. UI affiche traductions temps réel
|
||||
5. Bouton Stop arrête proprement
|
||||
6. Audio sauvegardé correctement
|
||||
7. Pas de crash sur 30min recording
|
||||
8. Latence totale <10s acceptable
|
||||
|
||||
---
|
||||
|
||||
## 📝 Notes Implémentation
|
||||
|
||||
### Thread Safety
|
||||
- Utiliser `std::mutex` + `std::lock_guard` pour queues
|
||||
- Pas de shared state sans protection
|
||||
- Use `std::atomic<bool>` pour flags (running, stopping)
|
||||
|
||||
### Error Handling
|
||||
- Try/catch sur API calls
|
||||
- Log errors (spdlog ou simple cout)
|
||||
- Retry logic (max 3 attempts)
|
||||
- Graceful degradation (skip chunk si error persistant)
|
||||
|
||||
### Audio Format
|
||||
- **Sample rate**: 16kHz (optimal pour Whisper)
|
||||
- **Channels**: Mono (sufficient, réduit bandwidth)
|
||||
- **Format**: 16-bit PCM WAV
|
||||
- **Chunk size**: Configurable (default 10s)
|
||||
|
||||
### API Best Practices
|
||||
- **Timeout**: 30s pour Whisper, 15s pour Claude
|
||||
- **Retry**: Exponential backoff (1s, 2s, 4s)
|
||||
- **Rate limiting**: Respect API limits (monitor 429 errors)
|
||||
- **Headers**: Always set User-Agent, API version
|
||||
|
||||
---
|
||||
|
||||
## 🔄 Post-MVP (Phase 2)
|
||||
|
||||
**Not included in MVP, but planned**:
|
||||
- ❌ Résumé auto post-meeting (Claude summary)
|
||||
- ❌ Export structuré (transcripts + audio)
|
||||
- ❌ Système de recherche (backlog)
|
||||
- ❌ Diarization (qui parle)
|
||||
- ❌ Replay mode
|
||||
- ❌ GUI élaborée (settings, etc)
|
||||
|
||||
**Focus MVP**: Pipeline fonctionnel bout-à-bout, validation concept, usage réel premier meeting.
|
||||
|
||||
---
|
||||
|
||||
*Document créé: 20 novembre 2025*
|
||||
*Status: Ready to implement*
|
||||
*Estimated effort: 5 jours développement + 2 jours tests*
|
||||
0
recordings/.gitkeep
Normal file
0
recordings/.gitkeep
Normal file
79
src/api/ClaudeClient.cpp
Normal file
79
src/api/ClaudeClient.cpp
Normal file
@ -0,0 +1,79 @@
|
||||
#include "ClaudeClient.h"
|
||||
#include <httplib.h>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <iostream>
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
namespace secondvoice {
|
||||
|
||||
ClaudeClient::ClaudeClient(const std::string& api_key)
|
||||
: api_key_(api_key) {
|
||||
}
|
||||
|
||||
std::optional<ClaudeResponse> ClaudeClient::translate(
|
||||
const std::string& chinese_text,
|
||||
const std::string& system_prompt,
|
||||
int max_tokens,
|
||||
float temperature) {
|
||||
|
||||
// Build request JSON
|
||||
json request_json = {
|
||||
{"model", MODEL},
|
||||
{"max_tokens", max_tokens},
|
||||
{"temperature", temperature},
|
||||
{"messages", json::array({
|
||||
{
|
||||
{"role", "user"},
|
||||
{"content", "Traduis en français: " + chinese_text}
|
||||
}
|
||||
})}
|
||||
};
|
||||
|
||||
if (!system_prompt.empty()) {
|
||||
request_json["system"] = system_prompt;
|
||||
}
|
||||
|
||||
std::string request_body = request_json.dump();
|
||||
|
||||
// Make HTTP request
|
||||
httplib::Client client("https://api.anthropic.com");
|
||||
client.set_read_timeout(15, 0); // 15 seconds timeout
|
||||
|
||||
httplib::Headers headers = {
|
||||
{"x-api-key", api_key_},
|
||||
{"anthropic-version", API_VERSION},
|
||||
{"content-type", "application/json"}
|
||||
};
|
||||
|
||||
auto res = client.Post("/v1/messages", headers, request_body, "application/json");
|
||||
|
||||
if (!res) {
|
||||
std::cerr << "Claude API request failed: " << httplib::to_string(res.error()) << std::endl;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (res->status != 200) {
|
||||
std::cerr << "Claude API error " << res->status << ": " << res->body << std::endl;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Parse response
|
||||
try {
|
||||
json response_json = json::parse(res->body);
|
||||
|
||||
if (!response_json.contains("content") || !response_json["content"].is_array()) {
|
||||
std::cerr << "Invalid Claude API response format" << std::endl;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
ClaudeResponse response;
|
||||
response.text = response_json["content"][0]["text"].get<std::string>();
|
||||
return response;
|
||||
} catch (const json::exception& e) {
|
||||
std::cerr << "Failed to parse Claude response: " << e.what() << std::endl;
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace secondvoice
|
||||
30
src/api/ClaudeClient.h
Normal file
30
src/api/ClaudeClient.h
Normal file
@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <optional>
|
||||
|
||||
namespace secondvoice {
|
||||
|
||||
struct ClaudeResponse {
|
||||
std::string text;
|
||||
};
|
||||
|
||||
class ClaudeClient {
|
||||
public:
|
||||
ClaudeClient(const std::string& api_key);
|
||||
|
||||
std::optional<ClaudeResponse> translate(
|
||||
const std::string& chinese_text,
|
||||
const std::string& system_prompt = "",
|
||||
int max_tokens = 1024,
|
||||
float temperature = 0.3f
|
||||
);
|
||||
|
||||
private:
|
||||
std::string api_key_;
|
||||
static constexpr const char* API_URL = "https://api.anthropic.com/v1/messages";
|
||||
static constexpr const char* MODEL = "claude-haiku-4-20250514";
|
||||
static constexpr const char* API_VERSION = "2023-06-01";
|
||||
};
|
||||
|
||||
} // namespace secondvoice
|
||||
85
src/api/WhisperClient.cpp
Normal file
85
src/api/WhisperClient.cpp
Normal file
@ -0,0 +1,85 @@
|
||||
#include "WhisperClient.h"
|
||||
#include "../audio/AudioBuffer.h"
|
||||
#include <httplib.h>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
namespace secondvoice {
|
||||
|
||||
WhisperClient::WhisperClient(const std::string& api_key)
|
||||
: api_key_(api_key) {
|
||||
}
|
||||
|
||||
std::optional<WhisperResponse> WhisperClient::transcribe(
|
||||
const std::vector<float>& audio_data,
|
||||
int sample_rate,
|
||||
int channels,
|
||||
const std::string& language,
|
||||
float temperature) {
|
||||
|
||||
// Save audio to temporary WAV file
|
||||
AudioBuffer buffer(sample_rate, channels);
|
||||
buffer.addSamples(audio_data);
|
||||
|
||||
std::string temp_file = "/tmp/secondvoice_temp.wav";
|
||||
if (!buffer.saveToWav(temp_file)) {
|
||||
std::cerr << "Failed to save temporary WAV file" << std::endl;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Read WAV file
|
||||
std::ifstream file(temp_file, std::ios::binary);
|
||||
if (!file.is_open()) {
|
||||
std::cerr << "Failed to open temporary WAV file" << std::endl;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::ostringstream wav_stream;
|
||||
wav_stream << file.rdbuf();
|
||||
std::string wav_data = wav_stream.str();
|
||||
file.close();
|
||||
|
||||
// Make HTTP request
|
||||
httplib::Client client("https://api.openai.com");
|
||||
client.set_read_timeout(30, 0); // 30 seconds timeout
|
||||
|
||||
httplib::MultipartFormDataItems items = {
|
||||
{"file", wav_data, "audio.wav", "audio/wav"},
|
||||
{"model", "whisper-1", "", ""},
|
||||
{"language", language, "", ""},
|
||||
{"temperature", std::to_string(temperature), "", ""}
|
||||
};
|
||||
|
||||
httplib::Headers headers = {
|
||||
{"Authorization", "Bearer " + api_key_}
|
||||
};
|
||||
|
||||
auto res = client.Post("/v1/audio/transcriptions", headers, items);
|
||||
|
||||
if (!res) {
|
||||
std::cerr << "Whisper API request failed: " << httplib::to_string(res.error()) << std::endl;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (res->status != 200) {
|
||||
std::cerr << "Whisper API error " << res->status << ": " << res->body << std::endl;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Parse response
|
||||
try {
|
||||
json response_json = json::parse(res->body);
|
||||
WhisperResponse response;
|
||||
response.text = response_json["text"].get<std::string>();
|
||||
return response;
|
||||
} catch (const json::exception& e) {
|
||||
std::cerr << "Failed to parse Whisper response: " << e.what() << std::endl;
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace secondvoice
|
||||
30
src/api/WhisperClient.h
Normal file
30
src/api/WhisperClient.h
Normal file
@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <optional>
|
||||
|
||||
namespace secondvoice {
|
||||
|
||||
struct WhisperResponse {
|
||||
std::string text;
|
||||
};
|
||||
|
||||
class WhisperClient {
|
||||
public:
|
||||
WhisperClient(const std::string& api_key);
|
||||
|
||||
std::optional<WhisperResponse> transcribe(
|
||||
const std::vector<float>& audio_data,
|
||||
int sample_rate,
|
||||
int channels,
|
||||
const std::string& language = "zh",
|
||||
float temperature = 0.0f
|
||||
);
|
||||
|
||||
private:
|
||||
std::string api_key_;
|
||||
static constexpr const char* API_URL = "https://api.openai.com/v1/audio/transcriptions";
|
||||
};
|
||||
|
||||
} // namespace secondvoice
|
||||
76
src/audio/AudioBuffer.cpp
Normal file
76
src/audio/AudioBuffer.cpp
Normal file
@ -0,0 +1,76 @@
|
||||
#include "AudioBuffer.h"
|
||||
#include <fstream>
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
|
||||
namespace secondvoice {
|
||||
|
||||
AudioBuffer::AudioBuffer(int sample_rate, int channels)
|
||||
: sample_rate_(sample_rate)
|
||||
, channels_(channels) {
|
||||
}
|
||||
|
||||
void AudioBuffer::addSamples(const std::vector<float>& samples) {
|
||||
samples_.insert(samples_.end(), samples.begin(), samples.end());
|
||||
}
|
||||
|
||||
std::vector<float> AudioBuffer::getSamples() const {
|
||||
return samples_;
|
||||
}
|
||||
|
||||
void AudioBuffer::clear() {
|
||||
samples_.clear();
|
||||
}
|
||||
|
||||
bool AudioBuffer::saveToWav(const std::string& filename) const {
|
||||
std::ofstream file(filename, std::ios::binary);
|
||||
if (!file.is_open()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// WAV header
|
||||
struct WavHeader {
|
||||
char riff[4] = {'R', 'I', 'F', 'F'};
|
||||
uint32_t file_size;
|
||||
char wave[4] = {'W', 'A', 'V', 'E'};
|
||||
char fmt[4] = {'f', 'm', 't', ' '};
|
||||
uint32_t fmt_size = 16;
|
||||
uint16_t audio_format = 1; // PCM
|
||||
uint16_t num_channels;
|
||||
uint32_t sample_rate;
|
||||
uint32_t byte_rate;
|
||||
uint16_t block_align;
|
||||
uint16_t bits_per_sample = 16;
|
||||
char data[4] = {'d', 'a', 't', 'a'};
|
||||
uint32_t data_size;
|
||||
};
|
||||
|
||||
WavHeader header;
|
||||
header.num_channels = channels_;
|
||||
header.sample_rate = sample_rate_;
|
||||
header.byte_rate = sample_rate_ * channels_ * 2; // 16-bit = 2 bytes
|
||||
header.block_align = channels_ * 2;
|
||||
|
||||
// Convert float samples to 16-bit PCM
|
||||
std::vector<int16_t> pcm_samples;
|
||||
pcm_samples.reserve(samples_.size());
|
||||
for (float sample : samples_) {
|
||||
// Clamp to [-1.0, 1.0] and convert to 16-bit
|
||||
float clamped = std::max(-1.0f, std::min(1.0f, sample));
|
||||
pcm_samples.push_back(static_cast<int16_t>(clamped * 32767.0f));
|
||||
}
|
||||
|
||||
header.data_size = pcm_samples.size() * sizeof(int16_t);
|
||||
header.file_size = sizeof(WavHeader) - 8 + header.data_size;
|
||||
|
||||
// Write header
|
||||
file.write(reinterpret_cast<const char*>(&header), sizeof(header));
|
||||
|
||||
// Write PCM data
|
||||
file.write(reinterpret_cast<const char*>(pcm_samples.data()), header.data_size);
|
||||
|
||||
file.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace secondvoice
|
||||
27
src/audio/AudioBuffer.h
Normal file
27
src/audio/AudioBuffer.h
Normal file
@ -0,0 +1,27 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
namespace secondvoice {
|
||||
|
||||
class AudioBuffer {
|
||||
public:
|
||||
AudioBuffer(int sample_rate, int channels);
|
||||
|
||||
void addSamples(const std::vector<float>& samples);
|
||||
std::vector<float> getSamples() const;
|
||||
void clear();
|
||||
|
||||
bool saveToWav(const std::string& filename) const;
|
||||
|
||||
size_t size() const { return samples_.size(); }
|
||||
bool empty() const { return samples_.empty(); }
|
||||
|
||||
private:
|
||||
int sample_rate_;
|
||||
int channels_;
|
||||
std::vector<float> samples_;
|
||||
};
|
||||
|
||||
} // namespace secondvoice
|
||||
110
src/audio/AudioCapture.cpp
Normal file
110
src/audio/AudioCapture.cpp
Normal file
@ -0,0 +1,110 @@
|
||||
#include "AudioCapture.h"
|
||||
#include <iostream>
|
||||
|
||||
namespace secondvoice {
|
||||
|
||||
AudioCapture::AudioCapture(int sample_rate, int channels, int chunk_duration_seconds)
|
||||
: sample_rate_(sample_rate)
|
||||
, channels_(channels)
|
||||
, chunk_duration_seconds_(chunk_duration_seconds) {
|
||||
}
|
||||
|
||||
AudioCapture::~AudioCapture() {
|
||||
stop();
|
||||
if (stream_) {
|
||||
Pa_CloseStream(stream_);
|
||||
}
|
||||
Pa_Terminate();
|
||||
}
|
||||
|
||||
bool AudioCapture::initialize() {
|
||||
PaError err = Pa_Initialize();
|
||||
if (err != paNoError) {
|
||||
std::cerr << "PortAudio init error: " << Pa_GetErrorText(err) << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int AudioCapture::audioCallback(const void* input, void* output,
|
||||
unsigned long frame_count,
|
||||
const PaStreamCallbackTimeInfo* time_info,
|
||||
PaStreamCallbackFlags status_flags,
|
||||
void* user_data) {
|
||||
AudioCapture* self = static_cast<AudioCapture*>(user_data);
|
||||
const float* in = static_cast<const float*>(input);
|
||||
|
||||
// Accumulate audio data
|
||||
for (unsigned long i = 0; i < frame_count * self->channels_; ++i) {
|
||||
self->buffer_.push_back(in[i]);
|
||||
}
|
||||
|
||||
// Check if we have accumulated enough data for a chunk
|
||||
size_t chunk_samples = self->sample_rate_ * self->channels_ * self->chunk_duration_seconds_;
|
||||
if (self->buffer_.size() >= chunk_samples) {
|
||||
// Call the callback with the chunk
|
||||
if (self->callback_) {
|
||||
self->callback_(self->buffer_);
|
||||
}
|
||||
self->buffer_.clear();
|
||||
}
|
||||
|
||||
return paContinue;
|
||||
}
|
||||
|
||||
bool AudioCapture::start(AudioCallback callback) {
|
||||
if (is_recording_) {
|
||||
return false;
|
||||
}
|
||||
|
||||
callback_ = callback;
|
||||
buffer_.clear();
|
||||
|
||||
PaStreamParameters input_params;
|
||||
input_params.device = Pa_GetDefaultInputDevice();
|
||||
if (input_params.device == paNoDevice) {
|
||||
std::cerr << "No default input device" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
input_params.channelCount = channels_;
|
||||
input_params.sampleFormat = paFloat32;
|
||||
input_params.suggestedLatency = Pa_GetDeviceInfo(input_params.device)->defaultLowInputLatency;
|
||||
input_params.hostApiSpecificStreamInfo = nullptr;
|
||||
|
||||
PaError err = Pa_OpenStream(
|
||||
&stream_,
|
||||
&input_params,
|
||||
nullptr, // no output
|
||||
sample_rate_,
|
||||
paFramesPerBufferUnspecified,
|
||||
paClipOff,
|
||||
&AudioCapture::audioCallback,
|
||||
this
|
||||
);
|
||||
|
||||
if (err != paNoError) {
|
||||
std::cerr << "PortAudio open stream error: " << Pa_GetErrorText(err) << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
err = Pa_StartStream(stream_);
|
||||
if (err != paNoError) {
|
||||
std::cerr << "PortAudio start stream error: " << Pa_GetErrorText(err) << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
is_recording_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void AudioCapture::stop() {
|
||||
if (!is_recording_ || !stream_) {
|
||||
return;
|
||||
}
|
||||
|
||||
Pa_StopStream(stream_);
|
||||
is_recording_ = false;
|
||||
}
|
||||
|
||||
} // namespace secondvoice
|
||||
39
src/audio/AudioCapture.h
Normal file
39
src/audio/AudioCapture.h
Normal file
@ -0,0 +1,39 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <portaudio.h>
|
||||
|
||||
namespace secondvoice {
|
||||
|
||||
class AudioCapture {
|
||||
public:
|
||||
using AudioCallback = std::function<void(const std::vector<float>&)>;
|
||||
|
||||
AudioCapture(int sample_rate, int channels, int chunk_duration_seconds);
|
||||
~AudioCapture();
|
||||
|
||||
bool initialize();
|
||||
bool start(AudioCallback callback);
|
||||
void stop();
|
||||
bool isRecording() const { return is_recording_; }
|
||||
|
||||
private:
|
||||
static int audioCallback(const void* input, void* output,
|
||||
unsigned long frame_count,
|
||||
const PaStreamCallbackTimeInfo* time_info,
|
||||
PaStreamCallbackFlags status_flags,
|
||||
void* user_data);
|
||||
|
||||
int sample_rate_;
|
||||
int channels_;
|
||||
int chunk_duration_seconds_;
|
||||
bool is_recording_ = false;
|
||||
|
||||
PaStream* stream_ = nullptr;
|
||||
AudioCallback callback_;
|
||||
std::vector<float> buffer_;
|
||||
};
|
||||
|
||||
} // namespace secondvoice
|
||||
214
src/core/Pipeline.cpp
Normal file
214
src/core/Pipeline.cpp
Normal file
@ -0,0 +1,214 @@
|
||||
#include "Pipeline.h"
|
||||
#include "../audio/AudioCapture.h"
|
||||
#include "../audio/AudioBuffer.h"
|
||||
#include "../api/WhisperClient.h"
|
||||
#include "../api/ClaudeClient.h"
|
||||
#include "../ui/TranslationUI.h"
|
||||
#include "../utils/Config.h"
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include <filesystem>
|
||||
|
||||
namespace secondvoice {
|
||||
|
||||
Pipeline::Pipeline() = default;
|
||||
|
||||
Pipeline::~Pipeline() {
|
||||
stop();
|
||||
}
|
||||
|
||||
bool Pipeline::initialize() {
|
||||
auto& config = Config::getInstance();
|
||||
|
||||
// Initialize audio capture
|
||||
audio_capture_ = std::make_unique<AudioCapture>(
|
||||
config.getAudioConfig().sample_rate,
|
||||
config.getAudioConfig().channels,
|
||||
config.getAudioConfig().chunk_duration_seconds
|
||||
);
|
||||
|
||||
if (!audio_capture_->initialize()) {
|
||||
std::cerr << "Failed to initialize audio capture" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Initialize API clients
|
||||
whisper_client_ = std::make_unique<WhisperClient>(config.getOpenAIKey());
|
||||
claude_client_ = std::make_unique<ClaudeClient>(config.getAnthropicKey());
|
||||
|
||||
// Initialize UI
|
||||
ui_ = std::make_unique<TranslationUI>(
|
||||
config.getUIConfig().window_width,
|
||||
config.getUIConfig().window_height
|
||||
);
|
||||
|
||||
if (!ui_->initialize()) {
|
||||
std::cerr << "Failed to initialize UI" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Initialize full recording buffer
|
||||
full_recording_ = std::make_unique<AudioBuffer>(
|
||||
config.getAudioConfig().sample_rate,
|
||||
config.getAudioConfig().channels
|
||||
);
|
||||
|
||||
// Create recordings directory if it doesn't exist
|
||||
std::filesystem::create_directories(config.getRecordingConfig().output_directory);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Pipeline::start() {
|
||||
if (running_) {
|
||||
return false;
|
||||
}
|
||||
|
||||
running_ = true;
|
||||
|
||||
// Start threads
|
||||
audio_thread_ = std::thread(&Pipeline::audioThread, this);
|
||||
processing_thread_ = std::thread(&Pipeline::processingThread, this);
|
||||
ui_thread_ = std::thread(&Pipeline::uiThread, this);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Pipeline::stop() {
|
||||
if (!running_) {
|
||||
return;
|
||||
}
|
||||
|
||||
running_ = false;
|
||||
|
||||
// Stop audio capture
|
||||
if (audio_capture_) {
|
||||
audio_capture_->stop();
|
||||
}
|
||||
|
||||
// Shutdown queues
|
||||
audio_queue_.shutdown();
|
||||
transcription_queue_.shutdown();
|
||||
|
||||
// Wait for threads
|
||||
if (audio_thread_.joinable()) {
|
||||
audio_thread_.join();
|
||||
}
|
||||
if (processing_thread_.joinable()) {
|
||||
processing_thread_.join();
|
||||
}
|
||||
if (ui_thread_.joinable()) {
|
||||
ui_thread_.join();
|
||||
}
|
||||
|
||||
// Save full recording
|
||||
auto& config = Config::getInstance();
|
||||
if (config.getRecordingConfig().save_audio && full_recording_) {
|
||||
auto now = std::chrono::system_clock::now();
|
||||
auto time_t = std::chrono::system_clock::to_time_t(now);
|
||||
std::stringstream ss;
|
||||
ss << config.getRecordingConfig().output_directory << "/"
|
||||
<< "recording_" << std::put_time(std::localtime(&time_t), "%Y%m%d_%H%M%S")
|
||||
<< ".wav";
|
||||
|
||||
if (full_recording_->saveToWav(ss.str())) {
|
||||
std::cout << "Recording saved to: " << ss.str() << std::endl;
|
||||
} else {
|
||||
std::cerr << "Failed to save recording" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Pipeline::audioThread() {
|
||||
auto& config = Config::getInstance();
|
||||
|
||||
audio_capture_->start([this, &config](const std::vector<float>& audio_data) {
|
||||
if (!running_) return;
|
||||
|
||||
// Add to full recording
|
||||
full_recording_->addSamples(audio_data);
|
||||
|
||||
// Push to processing queue
|
||||
AudioChunk chunk;
|
||||
chunk.data = audio_data;
|
||||
chunk.sample_rate = config.getAudioConfig().sample_rate;
|
||||
chunk.channels = config.getAudioConfig().channels;
|
||||
|
||||
audio_queue_.push(std::move(chunk));
|
||||
});
|
||||
|
||||
// Keep thread alive while recording
|
||||
auto start_time = std::chrono::steady_clock::now();
|
||||
while (running_ && audio_capture_->isRecording()) {
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
|
||||
// Update duration
|
||||
auto now = std::chrono::steady_clock::now();
|
||||
recording_duration_ = std::chrono::duration_cast<std::chrono::seconds>(now - start_time).count();
|
||||
}
|
||||
}
|
||||
|
||||
void Pipeline::processingThread() {
|
||||
auto& config = Config::getInstance();
|
||||
|
||||
while (running_) {
|
||||
auto chunk_opt = audio_queue_.wait_and_pop();
|
||||
if (!chunk_opt.has_value()) {
|
||||
break; // Queue shutdown
|
||||
}
|
||||
|
||||
auto& chunk = chunk_opt.value();
|
||||
|
||||
// Transcribe with Whisper
|
||||
auto whisper_result = whisper_client_->transcribe(
|
||||
chunk.data,
|
||||
chunk.sample_rate,
|
||||
chunk.channels,
|
||||
config.getWhisperConfig().language,
|
||||
config.getWhisperConfig().temperature
|
||||
);
|
||||
|
||||
if (!whisper_result.has_value()) {
|
||||
std::cerr << "Whisper transcription failed" << std::endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Translate with Claude
|
||||
auto claude_result = claude_client_->translate(
|
||||
whisper_result->text,
|
||||
config.getClaudeConfig().system_prompt,
|
||||
config.getClaudeConfig().max_tokens,
|
||||
config.getClaudeConfig().temperature
|
||||
);
|
||||
|
||||
if (!claude_result.has_value()) {
|
||||
std::cerr << "Claude translation failed" << std::endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Add to UI
|
||||
ui_->addTranslation(whisper_result->text, claude_result->text);
|
||||
|
||||
std::cout << "CN: " << whisper_result->text << std::endl;
|
||||
std::cout << "FR: " << claude_result->text << std::endl;
|
||||
std::cout << "---" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void Pipeline::uiThread() {
|
||||
while (running_ && !ui_->shouldClose()) {
|
||||
ui_->setRecordingDuration(recording_duration_);
|
||||
ui_->setProcessingStatus("Processing...");
|
||||
ui_->render();
|
||||
|
||||
// Check if stop was requested
|
||||
if (ui_->isStopRequested()) {
|
||||
running_ = false;
|
||||
break;
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(16)); // ~60 FPS
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace secondvoice
|
||||
59
src/core/Pipeline.h
Normal file
59
src/core/Pipeline.h
Normal file
@ -0,0 +1,59 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <thread>
|
||||
#include <atomic>
|
||||
#include "../utils/ThreadSafeQueue.h"
|
||||
|
||||
namespace secondvoice {
|
||||
|
||||
class AudioCapture;
|
||||
class WhisperClient;
|
||||
class ClaudeClient;
|
||||
class TranslationUI;
|
||||
class AudioBuffer;
|
||||
|
||||
struct AudioChunk {
|
||||
std::vector<float> data;
|
||||
int sample_rate;
|
||||
int channels;
|
||||
};
|
||||
|
||||
struct TranscriptionResult {
|
||||
std::string chinese_text;
|
||||
};
|
||||
|
||||
class Pipeline {
|
||||
public:
|
||||
Pipeline();
|
||||
~Pipeline();
|
||||
|
||||
bool initialize();
|
||||
bool start();
|
||||
void stop();
|
||||
|
||||
bool isRunning() const { return running_; }
|
||||
|
||||
private:
|
||||
void audioThread();
|
||||
void processingThread();
|
||||
void uiThread();
|
||||
|
||||
std::unique_ptr<AudioCapture> audio_capture_;
|
||||
std::unique_ptr<WhisperClient> whisper_client_;
|
||||
std::unique_ptr<ClaudeClient> claude_client_;
|
||||
std::unique_ptr<TranslationUI> ui_;
|
||||
std::unique_ptr<AudioBuffer> full_recording_;
|
||||
|
||||
ThreadSafeQueue<AudioChunk> audio_queue_;
|
||||
ThreadSafeQueue<TranscriptionResult> transcription_queue_;
|
||||
|
||||
std::thread audio_thread_;
|
||||
std::thread processing_thread_;
|
||||
std::thread ui_thread_;
|
||||
|
||||
std::atomic<bool> running_{false};
|
||||
std::atomic<int> recording_duration_{0};
|
||||
};
|
||||
|
||||
} // namespace secondvoice
|
||||
56
src/main.cpp
Normal file
56
src/main.cpp
Normal file
@ -0,0 +1,56 @@
|
||||
#include <iostream>
|
||||
#include "utils/Config.h"
|
||||
#include "core/Pipeline.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
std::cout << "SecondVoice - Real-time Translation System" << std::endl;
|
||||
std::cout << "===========================================" << std::endl;
|
||||
|
||||
// Load configuration
|
||||
secondvoice::Config& config = secondvoice::Config::getInstance();
|
||||
if (!config.load("config.json", ".env")) {
|
||||
std::cerr << "Failed to load configuration" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::cout << "Configuration loaded successfully" << std::endl;
|
||||
std::cout << "Audio: " << config.getAudioConfig().sample_rate << "Hz, "
|
||||
<< config.getAudioConfig().channels << " channel(s), "
|
||||
<< config.getAudioConfig().chunk_duration_seconds << "s chunks" << std::endl;
|
||||
std::cout << "Whisper: " << config.getWhisperConfig().model
|
||||
<< " (language: " << config.getWhisperConfig().language << ")" << std::endl;
|
||||
std::cout << "Claude: " << config.getClaudeConfig().model << std::endl;
|
||||
std::cout << std::endl;
|
||||
|
||||
// Create and initialize pipeline
|
||||
secondvoice::Pipeline pipeline;
|
||||
if (!pipeline.initialize()) {
|
||||
std::cerr << "Failed to initialize pipeline" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::cout << "Pipeline initialized successfully" << std::endl;
|
||||
std::cout << "Starting recording and translation..." << std::endl;
|
||||
std::cout << std::endl;
|
||||
|
||||
// Start pipeline
|
||||
if (!pipeline.start()) {
|
||||
std::cerr << "Failed to start pipeline" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Wait for pipeline to finish (user clicks Stop button)
|
||||
while (pipeline.isRunning()) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
|
||||
std::cout << std::endl;
|
||||
std::cout << "Recording stopped" << std::endl;
|
||||
std::cout << "Saving audio..." << std::endl;
|
||||
|
||||
pipeline.stop();
|
||||
|
||||
std::cout << "Done!" << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
160
src/ui/TranslationUI.cpp
Normal file
160
src/ui/TranslationUI.cpp
Normal file
@ -0,0 +1,160 @@
|
||||
#include "TranslationUI.h"
|
||||
#include <imgui.h>
|
||||
#include <imgui_impl_glfw.h>
|
||||
#include <imgui_impl_opengl3.h>
|
||||
#include <iostream>
|
||||
|
||||
namespace secondvoice {
|
||||
|
||||
TranslationUI::TranslationUI(int width, int height)
|
||||
: width_(width)
|
||||
, height_(height) {
|
||||
}
|
||||
|
||||
TranslationUI::~TranslationUI() {
|
||||
if (window_) {
|
||||
ImGui_ImplOpenGL3_Shutdown();
|
||||
ImGui_ImplGlfw_Shutdown();
|
||||
ImGui::DestroyContext();
|
||||
glfwDestroyWindow(window_);
|
||||
glfwTerminate();
|
||||
}
|
||||
}
|
||||
|
||||
bool TranslationUI::initialize() {
|
||||
// Initialize GLFW
|
||||
if (!glfwInit()) {
|
||||
std::cerr << "Failed to initialize GLFW" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// OpenGL 3.3 + GLSL 330
|
||||
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
|
||||
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
|
||||
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
|
||||
|
||||
// Create window
|
||||
window_ = glfwCreateWindow(width_, height_, "SecondVoice - Live Translation", nullptr, nullptr);
|
||||
if (!window_) {
|
||||
std::cerr << "Failed to create GLFW window" << std::endl;
|
||||
glfwTerminate();
|
||||
return false;
|
||||
}
|
||||
|
||||
glfwMakeContextCurrent(window_);
|
||||
glfwSwapInterval(1); // Enable vsync
|
||||
|
||||
// Initialize ImGui
|
||||
IMGUI_CHECKVERSION();
|
||||
ImGui::CreateContext();
|
||||
ImGuiIO& io = ImGui::GetIO();
|
||||
io.ConfigFlags |= ImGuiConfigFlags_NavEnableKeyboard;
|
||||
|
||||
ImGui::StyleColorsDark();
|
||||
|
||||
ImGui_ImplGlfw_InitForOpenGL(window_, true);
|
||||
ImGui_ImplOpenGL3_Init("#version 330");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void TranslationUI::render() {
|
||||
glfwPollEvents();
|
||||
|
||||
// Start ImGui frame
|
||||
ImGui_ImplOpenGL3_NewFrame();
|
||||
ImGui_ImplGlfw_NewFrame();
|
||||
ImGui::NewFrame();
|
||||
|
||||
// Main window (full viewport)
|
||||
ImGui::SetNextWindowPos(ImVec2(0, 0));
|
||||
ImGui::SetNextWindowSize(ImGui::GetIO().DisplaySize);
|
||||
ImGui::Begin("SecondVoice", nullptr,
|
||||
ImGuiWindowFlags_NoTitleBar |
|
||||
ImGuiWindowFlags_NoResize |
|
||||
ImGuiWindowFlags_NoMove |
|
||||
ImGuiWindowFlags_NoCollapse);
|
||||
|
||||
renderTranslations();
|
||||
renderControls();
|
||||
renderStatus();
|
||||
|
||||
ImGui::End();
|
||||
|
||||
// Rendering
|
||||
ImGui::Render();
|
||||
int display_w, display_h;
|
||||
glfwGetFramebufferSize(window_, &display_w, &display_h);
|
||||
glViewport(0, 0, display_w, display_h);
|
||||
glClearColor(0.1f, 0.1f, 0.1f, 1.0f);
|
||||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData());
|
||||
|
||||
glfwSwapBuffers(window_);
|
||||
}
|
||||
|
||||
bool TranslationUI::shouldClose() const {
|
||||
return glfwWindowShouldClose(window_);
|
||||
}
|
||||
|
||||
void TranslationUI::addTranslation(const std::string& chinese, const std::string& french) {
|
||||
messages_.push_back({chinese, french});
|
||||
}
|
||||
|
||||
void TranslationUI::renderTranslations() {
|
||||
ImGui::Text("SecondVoice - Live Translation");
|
||||
ImGui::Separator();
|
||||
|
||||
ImGui::BeginChild("Translations", ImVec2(0, -120), true);
|
||||
|
||||
for (const auto& msg : messages_) {
|
||||
ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.5f, 0.8f, 1.0f, 1.0f));
|
||||
ImGui::TextWrapped("中文: %s", msg.chinese.c_str());
|
||||
ImGui::PopStyleColor();
|
||||
|
||||
ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.5f, 1.0f, 0.5f, 1.0f));
|
||||
ImGui::TextWrapped("FR: %s", msg.french.c_str());
|
||||
ImGui::PopStyleColor();
|
||||
|
||||
ImGui::Spacing();
|
||||
ImGui::Separator();
|
||||
ImGui::Spacing();
|
||||
}
|
||||
|
||||
if (auto_scroll_ && ImGui::GetScrollY() >= ImGui::GetScrollMaxY()) {
|
||||
ImGui::SetScrollHereY(1.0f);
|
||||
}
|
||||
|
||||
ImGui::EndChild();
|
||||
}
|
||||
|
||||
void TranslationUI::renderControls() {
|
||||
ImGui::Spacing();
|
||||
|
||||
// Center the stop button
|
||||
float button_width = 200.0f;
|
||||
float window_width = ImGui::GetWindowWidth();
|
||||
ImGui::SetCursorPosX((window_width - button_width) * 0.5f);
|
||||
|
||||
if (ImGui::Button("STOP RECORDING", ImVec2(button_width, 40))) {
|
||||
stop_requested_ = true;
|
||||
}
|
||||
|
||||
ImGui::Spacing();
|
||||
}
|
||||
|
||||
void TranslationUI::renderStatus() {
|
||||
ImGui::Separator();
|
||||
|
||||
// Format duration as MM:SS
|
||||
int minutes = recording_duration_ / 60;
|
||||
int seconds = recording_duration_ % 60;
|
||||
ImGui::Text("Recording... Duration: %02d:%02d", minutes, seconds);
|
||||
|
||||
if (!processing_status_.empty()) {
|
||||
ImGui::SameLine();
|
||||
ImGui::Text(" | Status: %s", processing_status_.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace secondvoice
|
||||
47
src/ui/TranslationUI.h
Normal file
47
src/ui/TranslationUI.h
Normal file
@ -0,0 +1,47 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <GLFW/glfw3.h>
|
||||
|
||||
namespace secondvoice {
|
||||
|
||||
struct TranslationMessage {
|
||||
std::string chinese;
|
||||
std::string french;
|
||||
};
|
||||
|
||||
class TranslationUI {
|
||||
public:
|
||||
TranslationUI(int width, int height);
|
||||
~TranslationUI();
|
||||
|
||||
bool initialize();
|
||||
void render();
|
||||
bool shouldClose() const;
|
||||
|
||||
void addTranslation(const std::string& chinese, const std::string& french);
|
||||
bool isStopRequested() const { return stop_requested_; }
|
||||
void resetStopRequest() { stop_requested_ = false; }
|
||||
|
||||
void setRecordingDuration(int seconds) { recording_duration_ = seconds; }
|
||||
void setProcessingStatus(const std::string& status) { processing_status_ = status; }
|
||||
|
||||
private:
|
||||
void renderTranslations();
|
||||
void renderControls();
|
||||
void renderStatus();
|
||||
|
||||
int width_;
|
||||
int height_;
|
||||
GLFWwindow* window_ = nullptr;
|
||||
|
||||
std::vector<TranslationMessage> messages_;
|
||||
bool stop_requested_ = false;
|
||||
bool auto_scroll_ = true;
|
||||
|
||||
int recording_duration_ = 0;
|
||||
std::string processing_status_;
|
||||
};
|
||||
|
||||
} // namespace secondvoice
|
||||
106
src/utils/Config.cpp
Normal file
106
src/utils/Config.cpp
Normal file
@ -0,0 +1,106 @@
|
||||
#include "Config.h"
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
namespace secondvoice {
|
||||
|
||||
Config& Config::getInstance() {
|
||||
static Config instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
bool Config::load(const std::string& config_path, const std::string& env_path) {
|
||||
// Load .env file
|
||||
std::ifstream env_file(env_path);
|
||||
if (env_file.is_open()) {
|
||||
std::string line;
|
||||
while (std::getline(env_file, line)) {
|
||||
if (line.empty() || line[0] == '#') continue;
|
||||
|
||||
auto pos = line.find('=');
|
||||
if (pos != std::string::npos) {
|
||||
std::string key = line.substr(0, pos);
|
||||
std::string value = line.substr(pos + 1);
|
||||
|
||||
// Remove quotes if present
|
||||
if (!value.empty() && value.front() == '"' && value.back() == '"') {
|
||||
value = value.substr(1, value.length() - 2);
|
||||
}
|
||||
|
||||
if (key == "OPENAI_API_KEY") {
|
||||
openai_key_ = value;
|
||||
} else if (key == "ANTHROPIC_API_KEY") {
|
||||
anthropic_key_ = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
env_file.close();
|
||||
} else {
|
||||
std::cerr << "Warning: Could not open .env file: " << env_path << std::endl;
|
||||
}
|
||||
|
||||
// Load config.json
|
||||
std::ifstream config_file(config_path);
|
||||
if (!config_file.is_open()) {
|
||||
std::cerr << "Error: Could not open config file: " << config_path << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
json config_json;
|
||||
try {
|
||||
config_file >> config_json;
|
||||
} catch (const json::parse_error& e) {
|
||||
std::cerr << "Error parsing config.json: " << e.what() << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Parse audio config
|
||||
if (config_json.contains("audio")) {
|
||||
auto& audio = config_json["audio"];
|
||||
audio_config_.sample_rate = audio.value("sample_rate", 16000);
|
||||
audio_config_.channels = audio.value("channels", 1);
|
||||
audio_config_.chunk_duration_seconds = audio.value("chunk_duration_seconds", 10);
|
||||
audio_config_.format = audio.value("format", "wav");
|
||||
}
|
||||
|
||||
// Parse whisper config
|
||||
if (config_json.contains("whisper")) {
|
||||
auto& whisper = config_json["whisper"];
|
||||
whisper_config_.model = whisper.value("model", "whisper-1");
|
||||
whisper_config_.language = whisper.value("language", "zh");
|
||||
whisper_config_.temperature = whisper.value("temperature", 0.0f);
|
||||
}
|
||||
|
||||
// Parse claude config
|
||||
if (config_json.contains("claude")) {
|
||||
auto& claude = config_json["claude"];
|
||||
claude_config_.model = claude.value("model", "claude-haiku-4-20250514");
|
||||
claude_config_.max_tokens = claude.value("max_tokens", 1024);
|
||||
claude_config_.temperature = claude.value("temperature", 0.3f);
|
||||
claude_config_.system_prompt = claude.value("system_prompt", "");
|
||||
}
|
||||
|
||||
// Parse UI config
|
||||
if (config_json.contains("ui")) {
|
||||
auto& ui = config_json["ui"];
|
||||
ui_config_.window_width = ui.value("window_width", 800);
|
||||
ui_config_.window_height = ui.value("window_height", 600);
|
||||
ui_config_.font_size = ui.value("font_size", 16);
|
||||
ui_config_.max_display_lines = ui.value("max_display_lines", 50);
|
||||
}
|
||||
|
||||
// Parse recording config
|
||||
if (config_json.contains("recording")) {
|
||||
auto& recording = config_json["recording"];
|
||||
recording_config_.save_audio = recording.value("save_audio", true);
|
||||
recording_config_.output_directory = recording.value("output_directory", "./recordings");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace secondvoice
|
||||
69
src/utils/Config.h
Normal file
69
src/utils/Config.h
Normal file
@ -0,0 +1,69 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace secondvoice {
|
||||
|
||||
struct AudioConfig {
|
||||
int sample_rate;
|
||||
int channels;
|
||||
int chunk_duration_seconds;
|
||||
std::string format;
|
||||
};
|
||||
|
||||
struct WhisperConfig {
|
||||
std::string model;
|
||||
std::string language;
|
||||
float temperature;
|
||||
};
|
||||
|
||||
struct ClaudeConfig {
|
||||
std::string model;
|
||||
int max_tokens;
|
||||
float temperature;
|
||||
std::string system_prompt;
|
||||
};
|
||||
|
||||
struct UIConfig {
|
||||
int window_width;
|
||||
int window_height;
|
||||
int font_size;
|
||||
int max_display_lines;
|
||||
};
|
||||
|
||||
struct RecordingConfig {
|
||||
bool save_audio;
|
||||
std::string output_directory;
|
||||
};
|
||||
|
||||
class Config {
|
||||
public:
|
||||
static Config& getInstance();
|
||||
|
||||
bool load(const std::string& config_path, const std::string& env_path);
|
||||
|
||||
const AudioConfig& getAudioConfig() const { return audio_config_; }
|
||||
const WhisperConfig& getWhisperConfig() const { return whisper_config_; }
|
||||
const ClaudeConfig& getClaudeConfig() const { return claude_config_; }
|
||||
const UIConfig& getUIConfig() const { return ui_config_; }
|
||||
const RecordingConfig& getRecordingConfig() const { return recording_config_; }
|
||||
|
||||
const std::string& getOpenAIKey() const { return openai_key_; }
|
||||
const std::string& getAnthropicKey() const { return anthropic_key_; }
|
||||
|
||||
private:
|
||||
Config() = default;
|
||||
Config(const Config&) = delete;
|
||||
Config& operator=(const Config&) = delete;
|
||||
|
||||
AudioConfig audio_config_;
|
||||
WhisperConfig whisper_config_;
|
||||
ClaudeConfig claude_config_;
|
||||
UIConfig ui_config_;
|
||||
RecordingConfig recording_config_;
|
||||
|
||||
std::string openai_key_;
|
||||
std::string anthropic_key_;
|
||||
};
|
||||
|
||||
} // namespace secondvoice
|
||||
65
src/utils/ThreadSafeQueue.h
Normal file
65
src/utils/ThreadSafeQueue.h
Normal file
@ -0,0 +1,65 @@
|
||||
#pragma once
|
||||
|
||||
#include <queue>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <optional>
|
||||
|
||||
namespace secondvoice {
|
||||
|
||||
template<typename T>
|
||||
class ThreadSafeQueue {
|
||||
public:
|
||||
void push(T value) {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
queue_.push(std::move(value));
|
||||
cv_.notify_one();
|
||||
}
|
||||
|
||||
std::optional<T> pop() {
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
if (queue_.empty()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
T value = std::move(queue_.front());
|
||||
queue_.pop();
|
||||
return value;
|
||||
}
|
||||
|
||||
std::optional<T> wait_and_pop() {
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
cv_.wait(lock, [this] { return !queue_.empty() || shutdown_; });
|
||||
|
||||
if (shutdown_ && queue_.empty()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
T value = std::move(queue_.front());
|
||||
queue_.pop();
|
||||
return value;
|
||||
}
|
||||
|
||||
bool empty() const {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
return queue_.empty();
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
return queue_.size();
|
||||
}
|
||||
|
||||
void shutdown() {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
shutdown_ = true;
|
||||
cv_.notify_all();
|
||||
}
|
||||
|
||||
private:
|
||||
mutable std::mutex mutex_;
|
||||
std::condition_variable cv_;
|
||||
std::queue<T> queue_;
|
||||
bool shutdown_ = false;
|
||||
};
|
||||
|
||||
} // namespace secondvoice
|
||||
15
vcpkg.json
Normal file
15
vcpkg.json
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
"name": "secondvoice",
|
||||
"version": "0.1.0",
|
||||
"dependencies": [
|
||||
"portaudio",
|
||||
"cpp-httplib",
|
||||
"nlohmann-json",
|
||||
{
|
||||
"name": "imgui",
|
||||
"features": ["glfw-binding", "opengl3-binding"]
|
||||
},
|
||||
"glfw3",
|
||||
"opengl"
|
||||
]
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user