tune: Extend VAD speech duration and improve context prompt formatting
This commit is contained in:
parent
db0f8e5990
commit
49f9cb906e
@ -6,6 +6,11 @@
|
|||||||
"chunk_step_seconds": 5,
|
"chunk_step_seconds": 5,
|
||||||
"format": "ogg"
|
"format": "ogg"
|
||||||
},
|
},
|
||||||
|
"vad": {
|
||||||
|
"silence_duration_ms": 700,
|
||||||
|
"min_speech_duration_ms": 2000,
|
||||||
|
"max_speech_duration_ms": 30000
|
||||||
|
},
|
||||||
"whisper": {
|
"whisper": {
|
||||||
"model": "gpt-4o-mini-transcribe",
|
"model": "gpt-4o-mini-transcribe",
|
||||||
"language": "zh",
|
"language": "zh",
|
||||||
|
|||||||
@ -70,8 +70,8 @@ private:
|
|||||||
std::atomic<float> vad_rms_threshold_{0.02f}; // Was 0.01f
|
std::atomic<float> vad_rms_threshold_{0.02f}; // Was 0.01f
|
||||||
std::atomic<float> vad_peak_threshold_{0.08f}; // Was 0.04f
|
std::atomic<float> vad_peak_threshold_{0.08f}; // Was 0.04f
|
||||||
int silence_duration_ms_ = 700; // Wait 700ms of silence before cutting (was 400)
|
int silence_duration_ms_ = 700; // Wait 700ms of silence before cutting (was 400)
|
||||||
int min_speech_duration_ms_ = 1000; // Minimum 1s speech to send (was 300)
|
int min_speech_duration_ms_ = 2000; // Minimum 2s speech to send (was 1000)
|
||||||
int max_speech_duration_ms_ = 25000; // 25s max before forced flush
|
int max_speech_duration_ms_ = 30000; // 30s max before forced flush (was 25000)
|
||||||
|
|
||||||
// Adaptive noise floor
|
// Adaptive noise floor
|
||||||
float noise_floor_ = 0.005f; // Estimated background noise level
|
float noise_floor_ = 0.005f; // Estimated background noise level
|
||||||
|
|||||||
@ -468,11 +468,11 @@ std::string Pipeline::buildDynamicPrompt() const {
|
|||||||
// Build context from recent transcriptions
|
// Build context from recent transcriptions
|
||||||
std::stringstream context;
|
std::stringstream context;
|
||||||
context << base_prompt;
|
context << base_prompt;
|
||||||
context << "\n\nContexte des phrases précédentes: ";
|
context << "\n\nContexte des phrases précédentes:\n";
|
||||||
|
|
||||||
for (size_t i = 0; i < recent_transcriptions_.size(); ++i) {
|
for (size_t i = 0; i < recent_transcriptions_.size(); ++i) {
|
||||||
if (i > 0) context << " ";
|
context << std::to_string(i + 1) << ". "
|
||||||
context << recent_transcriptions_[i];
|
<< recent_transcriptions_[i] << "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
return context.str();
|
return context.str();
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user