From a28bb89913faea24b7c24b63a84b04bb05179d62 Mon Sep 17 00:00:00 2001
From: Trouve Alexis <Alexistrouve.pro@gmail.com>
Date: Sun, 23 Nov 2025 22:08:01 +0800
Subject: [PATCH] tune: Adjust VAD parameters for longer segments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- min_speech_duration: 300ms → 1000ms (avoid tiny segments)
- silence_duration: 400ms → 700ms (wait longer before cutting)
- hang_frames_threshold: 20 → 35 (~350ms pause tolerance)

This should reduce mid-sentence cuts and give Whisper more context.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 src/audio/AudioCapture.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/audio/AudioCapture.h b/src/audio/AudioCapture.h
index af39627..5a004e9 100644
--- a/src/audio/AudioCapture.h
+++ b/src/audio/AudioCapture.h
@@ -69,8 +69,8 @@ private:
     // VAD parameters - Higher threshold to avoid false triggers on filtered noise
     std::atomic<float> vad_rms_threshold_{0.02f};   // Was 0.01f
     std::atomic<float> vad_peak_threshold_{0.08f};  // Was 0.04f
-    int silence_duration_ms_ = 400;      // Wait 400ms of silence before cutting
-    int min_speech_duration_ms_ = 300;   // Minimum speech to send
+    int silence_duration_ms_ = 700;      // Wait 700ms of silence before cutting (was 400)
+    int min_speech_duration_ms_ = 1000;  // Minimum 1s speech to send (was 300)
     int max_speech_duration_ms_ = 25000; // 25s max before forced flush
 
     // Adaptive noise floor
@@ -79,7 +79,7 @@ private:
 
     // Hang time - wait before cutting to avoid mid-sentence cuts
     int hang_frames_ = 0;
-    int hang_frames_threshold_ = 20;     // ~200ms tolerance for pauses
+    int hang_frames_threshold_ = 35;     // ~350ms tolerance for pauses (was 20)
 
     // Zero-crossing rate for speech vs noise discrimination
     float last_zcr_ = 0.0f;