From a28bb89913faea24b7c24b63a84b04bb05179d62 Mon Sep 17 00:00:00 2001 From: Trouve Alexis Date: Sun, 23 Nov 2025 22:08:01 +0800 Subject: [PATCH] tune: Adjust VAD parameters for longer segments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - min_speech_duration: 300ms → 1000ms (avoid tiny segments) - silence_duration: 400ms → 700ms (wait longer before cutting) - hang_frames_threshold: 20 → 35 (~350ms pause tolerance) This should reduce mid-sentence cuts and give Whisper more context. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/audio/AudioCapture.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/audio/AudioCapture.h b/src/audio/AudioCapture.h index af39627..5a004e9 100644 --- a/src/audio/AudioCapture.h +++ b/src/audio/AudioCapture.h @@ -69,8 +69,8 @@ private: // VAD parameters - Higher threshold to avoid false triggers on filtered noise std::atomic vad_rms_threshold_{0.02f}; // Was 0.01f std::atomic vad_peak_threshold_{0.08f}; // Was 0.04f - int silence_duration_ms_ = 400; // Wait 400ms of silence before cutting - int min_speech_duration_ms_ = 300; // Minimum speech to send + int silence_duration_ms_ = 700; // Wait 700ms of silence before cutting (was 400) + int min_speech_duration_ms_ = 1000; // Minimum 1s speech to send (was 300) int max_speech_duration_ms_ = 25000; // 25s max before forced flush // Adaptive noise floor @@ -79,7 +79,7 @@ private: // Hang time - wait before cutting to avoid mid-sentence cuts int hang_frames_ = 0; - int hang_frames_threshold_ = 20; // ~200ms tolerance for pauses + int hang_frames_threshold_ = 35; // ~350ms tolerance for pauses (was 20) // Zero-crossing rate for speech vs noise discrimination float last_zcr_ = 0.0f;