diff --git a/CMakeLists.txt b/CMakeLists.txt
index 35d045f..071193d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.20)
-project(SecondVoice VERSION 0.1.0 LANGUAGES CXX)
+project(SecondVoice VERSION 0.1.0 LANGUAGES C CXX)
 
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
@@ -38,6 +38,35 @@ FetchContent_Declare(
 
 FetchContent_MakeAvailable(ogg opus)
 
+# Fetch RNNoise for noise reduction (using older stable version)
+FetchContent_Declare(
+  rnnoise
+  GIT_REPOSITORY https://github.com/xiph/rnnoise.git
+  GIT_TAG v0.1.1
+)
+
+FetchContent_MakeAvailable(rnnoise)
+
+# Build RNNoise as a static library (v0.1.1 structure)
+add_library(rnnoise STATIC
+    ${rnnoise_SOURCE_DIR}/src/denoise.c
+    ${rnnoise_SOURCE_DIR}/src/rnn.c
+    ${rnnoise_SOURCE_DIR}/src/rnn_data.c
+    ${rnnoise_SOURCE_DIR}/src/pitch.c
+    ${rnnoise_SOURCE_DIR}/src/kiss_fft.c
+    ${rnnoise_SOURCE_DIR}/src/celt_lpc.c
+)
+
+target_include_directories(rnnoise PUBLIC
+    ${rnnoise_SOURCE_DIR}/include
+    ${rnnoise_SOURCE_DIR}/src
+)
+
+# RNNoise needs math library
+if(UNIX)
+    target_link_libraries(rnnoise PRIVATE m)
+endif()
+
 # Create ImGui library with OpenGL/GLFW backends
 add_library(imgui_backends
     ${imgui_SOURCE_DIR}/imgui.cpp
@@ -70,6 +99,7 @@ set(SOURCES_UI
     # Audio module
     src/audio/AudioCapture.cpp
     src/audio/AudioBuffer.cpp
+    src/audio/NoiseReducer.cpp
     # API clients
     src/api/WhisperClient.cpp
     src/api/ClaudeClient.cpp
@@ -88,6 +118,7 @@ set(SOURCES_CONSOLE
     # Audio module
     src/audio/AudioCapture.cpp
     src/audio/AudioBuffer.cpp
+    src/audio/NoiseReducer.cpp
     # API clients
     src/api/WhisperClient.cpp
     src/api/ClaudeClient.cpp
@@ -107,9 +138,11 @@ add_executable(${PROJECT_NAME}_Console ${SOURCES_CONSOLE})
 # Include directories
 target_include_directories(${PROJECT_NAME} PRIVATE
     ${CMAKE_CURRENT_SOURCE_DIR}/src
+    ${rnnoise_SOURCE_DIR}/include
 )
 target_include_directories(${PROJECT_NAME}_Console PRIVATE
     ${CMAKE_CURRENT_SOURCE_DIR}/src
+    ${rnnoise_SOURCE_DIR}/include
 )
 
 # Link libraries
@@ -124,6 +157,7 @@ if(MINGW AND NOT BUILD_SHARED_LIBS)
         OpenGL::GL
         opus
         ogg
+        rnnoise
         # Windows system libraries
         winmm
         setupapi
@@ -136,6 +170,7 @@ if(MINGW AND NOT BUILD_SHARED_LIBS)
         nlohmann_json::nlohmann_json
         opus
         ogg
+        rnnoise
         # Windows system libraries
         winmm
         setupapi
@@ -152,6 +187,7 @@ else()
         OpenGL::GL
         opus
         ogg
+        rnnoise
     )
     # Console version - NO UI libs
     target_link_libraries(${PROJECT_NAME}_Console PRIVATE
@@ -159,6 +195,7 @@ else()
         nlohmann_json::nlohmann_json
         opus
         ogg
+        rnnoise
     )
 endif()
 
diff --git a/config.json b/config.json
index 54be627..c0edd1a 100644
--- a/config.json
+++ b/config.json
@@ -18,10 +18,10 @@
     "model": "claude-3-5-haiku-20241022",
     "max_tokens": 1024,
     "temperature": 0.3,
-    "system_prompt": "Tu es un traducteur chinois-français. Réponds UNIQUEMENT avec la traduction française, sans explications, notes, commentaires ou alternatives. Une seule phrase traduite, rien d'autre."
+    "system_prompt": "Tu es un traducteur chinois-français. Réponds UNIQUEMENT avec la traduction française, sans explications, notes, commentaires ou alternatives. Une seule phrase traduite, rien d'autre. Si du contenu offensant est présent, remplace-le par [contenu offensant] mais traduis impérativement le reste de la phrase."
   },
   "ui": {
-    "window_width": 1200,
+    "window_width": 1500,
     "window_height": 800,
     "font_size": 24,
     "max_display_lines": 50
diff --git a/src/audio/AudioCapture.cpp b/src/audio/AudioCapture.cpp
index 4542083..b384df7 100644
--- a/src/audio/AudioCapture.cpp
+++ b/src/audio/AudioCapture.cpp
@@ -1,14 +1,14 @@
 #include "AudioCapture.h"
+#include "NoiseReducer.h"
 #include <iostream>
 
 namespace secondvoice {
 
-AudioCapture::AudioCapture(int sample_rate, int channels, int chunk_duration_seconds, int chunk_step_seconds)
+AudioCapture::AudioCapture(int sample_rate, int channels)
     : sample_rate_(sample_rate)
     , channels_(channels)
-    , chunk_duration_seconds_(chunk_duration_seconds)
-    , chunk_step_seconds_(chunk_step_seconds > 0 ? chunk_step_seconds : chunk_duration_seconds) {
-    // If step not specified, use duration (no overlap)
+    , noise_reducer_(std::make_unique<NoiseReducer>()) {
+    std::cout << "[Audio] Noise reduction enabled (RNNoise)" << std::endl;
 }
 
 AudioCapture::~AudioCapture() {
@@ -35,31 +35,212 @@ int AudioCapture::audioCallback(const void* input, void* output,
                                 const PaStreamCallbackTimeInfo* time_info,
                                 PaStreamCallbackFlags status_flags,
                                 void* user_data) {
-    (void)output;       // Unused
-    (void)time_info;    // Unused
-    (void)status_flags; // Unused
+    (void)output;
+    (void)time_info;
+    (void)status_flags;
 
     AudioCapture* self = static_cast<AudioCapture*>(user_data);
     const float* in = static_cast<const float*>(input);
+    unsigned long sample_count = frame_count * self->channels_;
 
-    // Accumulate audio data
-    for (unsigned long i = 0; i < frame_count * self->channels_; ++i) {
-        self->buffer_.push_back(in[i]);
+    // === REAL-TIME DENOISING ===
+    // Process audio through RNNoise in real-time for meter display
+    std::vector<float> denoised_samples;
+    if (self->noise_reducer_ && self->noise_reducer_->isEnabled()) {
+        denoised_samples = self->noise_reducer_->processRealtime(in, sample_count);
     }
 
-    // Check if we have accumulated enough data for a chunk
-    size_t chunk_samples = self->sample_rate_ * self->channels_ * self->chunk_duration_seconds_;
-    size_t step_samples = self->sample_rate_ * self->channels_ * self->chunk_step_seconds_;
-
-    if (self->buffer_.size() >= chunk_samples) {
-        // Call the callback with the full chunk
-        if (self->callback_) {
-            // Send exactly chunk_samples (the window)
-            std::vector<float> chunk(self->buffer_.begin(), self->buffer_.begin() + chunk_samples);
-            self->callback_(chunk);
+    // Calculate RMS and Peak from RAW audio (for VAD detection)
+    float raw_sum_squared = 0.0f;
+    float raw_max_amp = 0.0f;
+    for (unsigned long i = 0; i < sample_count; ++i) {
+        float sample = in[i];
+        raw_sum_squared += sample * sample;
+        if (std::abs(sample) > raw_max_amp) {
+            raw_max_amp = std::abs(sample);
+        }
+    }
+    float instant_rms = std::sqrt(raw_sum_squared / sample_count);
+    float instant_peak = raw_max_amp;
+
+    // Calculate RMS and Peak from DENOISED audio (for UI meter)
+    float denoised_rms = 0.0f;
+    float denoised_peak = 0.0f;
+    if (!denoised_samples.empty()) {
+        float sum_squared = 0.0f;
+        float max_amp = 0.0f;
+        for (const float& sample : denoised_samples) {
+            sum_squared += sample * sample;
+            if (std::abs(sample) > max_amp) {
+                max_amp = std::abs(sample);
+            }
+        }
+        denoised_rms = std::sqrt(sum_squared / denoised_samples.size());
+        denoised_peak = max_amp;
+    } else {
+        // Fallback to raw if no denoised output yet
+        denoised_rms = instant_rms;
+        denoised_peak = instant_peak;
+    }
+
+    // Smooth RMS/Peak for display (fast attack, slow decay) - USE DENOISED VALUES
+    constexpr float alpha_rise = 0.1f;
+    constexpr float alpha_fall = 0.02f;
+
+    float old_rms = self->current_rms_.load(std::memory_order_relaxed);
+    float old_peak = self->current_peak_.load(std::memory_order_relaxed);
+
+    float alpha_rms = (denoised_rms > old_rms) ? alpha_rise : alpha_fall;
+    float alpha_peak = (denoised_peak > old_peak) ? alpha_rise : alpha_fall;
+
+    self->current_rms_.store(alpha_rms * denoised_rms + (1.0f - alpha_rms) * old_rms, std::memory_order_relaxed);
+    self->current_peak_.store(alpha_peak * denoised_peak + (1.0f - alpha_peak) * old_peak, std::memory_order_relaxed);
+
+    // === VAD NOW WORKS ON FILTERED AUDIO ===
+    // This way, filtered noise/claps won't trigger VAD!
+
+    // Calculate Zero-Crossing Rate on DENOISED audio
+    float zcr = 0.0f;
+    if (!denoised_samples.empty() && denoised_samples.size() > 1) {
+        int zero_crossings = 0;
+        for (size_t i = 1; i < denoised_samples.size(); ++i) {
+            if ((denoised_samples[i] >= 0 && denoised_samples[i-1] < 0) ||
+                (denoised_samples[i] < 0 && denoised_samples[i-1] >= 0)) {
+                zero_crossings++;
+            }
+        }
+        zcr = static_cast<float>(zero_crossings) / denoised_samples.size();
+    }
+    self->last_zcr_ = zcr;
+
+    // Get user-adjustable thresholds
+    float rms_thresh = self->vad_rms_threshold_.load(std::memory_order_relaxed);
+    float peak_thresh = self->vad_peak_threshold_.load(std::memory_order_relaxed);
+
+    // Adaptive noise floor: track minimum DENOISED energy level
+    if (denoised_rms < self->noise_floor_ * 2.0f) {
+        self->noise_floor_ = self->noise_floor_ * (1.0f - self->noise_floor_alpha_) +
+                             denoised_rms * self->noise_floor_alpha_;
+    }
+
+    // Dynamic threshold: noise floor + user threshold as margin
+    float adaptive_rms_thresh = self->noise_floor_ + rms_thresh;
+
+    // Speech detection on DENOISED audio:
+    // 1. Energy above adaptive threshold
+    // 2. ZCR in speech range (not too high = noise, not too low = silence)
+    bool energy_ok = (denoised_rms >= adaptive_rms_thresh) || (denoised_peak >= peak_thresh);
+    bool zcr_ok = (zcr >= self->speech_zcr_min_) && (zcr <= self->speech_zcr_max_);
+
+    // Speech = energy OK AND (ZCR OK or very high energy)
+    bool frame_has_speech = energy_ok && (zcr_ok || denoised_rms > adaptive_rms_thresh * 3.0f);
+
+    // Hang time logic: don't immediately cut on silence
+    if (frame_has_speech) {
+        self->hang_frames_ = self->hang_frames_threshold_;  // Reset hang counter
+    } else if (self->hang_frames_ > 0) {
+        self->hang_frames_--;
+        frame_has_speech = true;  // Keep "speaking" during hang time
+    }
+
+    // Calculate durations in samples
+    int silence_samples_threshold = (self->silence_duration_ms_ * self->sample_rate_ * self->channels_) / 1000;
+    int min_speech_samples = (self->min_speech_duration_ms_ * self->sample_rate_ * self->channels_) / 1000;
+    int max_speech_samples = (self->max_speech_duration_ms_ * self->sample_rate_ * self->channels_) / 1000;
+
+    if (frame_has_speech) {
+        // Speech detected
+        self->is_speech_active_.store(true, std::memory_order_relaxed);
+        self->silence_samples_count_ = 0;
+
+        // Add DENOISED audio to buffer (already processed by processRealtime above)
+        if (!denoised_samples.empty()) {
+            self->speech_buffer_.insert(self->speech_buffer_.end(),
+                                        denoised_samples.begin(), denoised_samples.end());
+        } else {
+            // Fallback to raw if denoising disabled
+            for (unsigned long i = 0; i < sample_count; ++i) {
+                self->speech_buffer_.push_back(in[i]);
+            }
+        }
+        self->speech_samples_count_ += sample_count;
+
+        // Force flush if too long
+        if (self->speech_samples_count_ >= max_speech_samples) {
+            std::cout << "[VAD] Max duration reached, forcing flush ("
+                      << self->speech_samples_count_ / (self->sample_rate_ * self->channels_) << "s)" << std::endl;
+
+            if (self->callback_ && self->speech_buffer_.size() >= static_cast<size_t>(min_speech_samples)) {
+                // Flush any remaining samples from the denoiser
+                if (self->noise_reducer_ && self->noise_reducer_->isEnabled()) {
+                    auto remaining = self->noise_reducer_->flushStream();
+                    self->speech_buffer_.insert(self->speech_buffer_.end(),
+                                                remaining.begin(), remaining.end());
+                    // Save preview for listening
+                    self->noise_reducer_->savePreview(self->speech_buffer_);
+                }
+                self->callback_(self->speech_buffer_);
+            }
+            self->speech_buffer_.clear();
+            self->speech_samples_count_ = 0;
+            // Reset stream for next segment
+            if (self->noise_reducer_) {
+                self->noise_reducer_->resetStream();
+            }
+        }
+    } else {
+        // True silence (after hang time expired)
+        self->silence_samples_count_ += sample_count;
+
+        // If we were speaking and now have enough silence, flush
+        if (self->speech_buffer_.size() > 0) {
+            // Add trailing silence (denoised)
+            if (!denoised_samples.empty()) {
+                self->speech_buffer_.insert(self->speech_buffer_.end(),
+                                            denoised_samples.begin(), denoised_samples.end());
+            } else {
+                for (unsigned long i = 0; i < sample_count; ++i) {
+                    self->speech_buffer_.push_back(in[i]);
+                }
+            }
+
+            if (self->silence_samples_count_ >= silence_samples_threshold) {
+                self->is_speech_active_.store(false, std::memory_order_relaxed);
+
+                // Flush if we have enough speech
+                if (self->speech_samples_count_ >= min_speech_samples) {
+                    // Flush any remaining samples from the denoiser
+                    if (self->noise_reducer_ && self->noise_reducer_->isEnabled()) {
+                        auto remaining = self->noise_reducer_->flushStream();
+                        self->speech_buffer_.insert(self->speech_buffer_.end(),
+                                                    remaining.begin(), remaining.end());
+                        // Save preview for listening
+                        self->noise_reducer_->savePreview(self->speech_buffer_);
+                    }
+
+                    float duration = static_cast<float>(self->speech_buffer_.size()) /
+                                   (self->sample_rate_ * self->channels_);
+                    std::cout << "[VAD] Speech ended (noise_floor=" << self->noise_floor_
+                              << "), flushing " << duration << "s (denoised)" << std::endl;
+
+                    if (self->callback_) {
+                        self->callback_(self->speech_buffer_);
+                    }
+                } else {
+                    std::cout << "[VAD] Speech too short (" << self->speech_samples_count_
+                              << " samples), discarding" << std::endl;
+                }
+
+                self->speech_buffer_.clear();
+                self->speech_samples_count_ = 0;
+                // Reset stream for next segment
+                if (self->noise_reducer_) {
+                    self->noise_reducer_->resetStream();
+                }
+            }
+        } else {
+            self->is_speech_active_.store(false, std::memory_order_relaxed);
         }
-        // Sliding window: remove only step_samples, keep overlap for next chunk
-        self->buffer_.erase(self->buffer_.begin(), self->buffer_.begin() + step_samples);
     }
 
     return paContinue;
@@ -71,7 +252,9 @@ bool AudioCapture::start(AudioCallback callback) {
     }
 
     callback_ = callback;
-    buffer_.clear();
+    speech_buffer_.clear();
+    silence_samples_count_ = 0;
+    speech_samples_count_ = 0;
 
     PaStreamParameters input_params;
     input_params.device = Pa_GetDefaultInputDevice();
@@ -88,7 +271,7 @@ bool AudioCapture::start(AudioCallback callback) {
     PaError err = Pa_OpenStream(
         &stream_,
         &input_params,
-        nullptr,  // no output
+        nullptr,
         sample_rate_,
         paFramesPerBufferUnspecified,
         paClipOff,
@@ -108,6 +291,9 @@ bool AudioCapture::start(AudioCallback callback) {
     }
 
     is_recording_ = true;
+    std::cout << "[VAD] Started with RMS threshold=" << vad_rms_threshold_
+              << ", Peak threshold=" << vad_peak_threshold_
+              << ", Silence duration=" << silence_duration_ms_ << "ms" << std::endl;
     return true;
 }
 
@@ -116,8 +302,42 @@ void AudioCapture::stop() {
         return;
     }
 
+    // Flush any remaining audio
+    if (speech_buffer_.size() > 0 && callback_) {
+        int min_speech_samples = (min_speech_duration_ms_ * sample_rate_ * channels_) / 1000;
+        if (speech_samples_count_ >= min_speech_samples) {
+            std::cout << "[VAD] Flushing remaining audio on stop (denoised)" << std::endl;
+
+            // Flush any remaining samples from the denoiser
+            if (noise_reducer_ && noise_reducer_->isEnabled()) {
+                auto remaining = noise_reducer_->flushStream();
+                speech_buffer_.insert(speech_buffer_.end(),
+                                      remaining.begin(), remaining.end());
+                // Save preview for listening
+                noise_reducer_->savePreview(speech_buffer_);
+            }
+
+            callback_(speech_buffer_);
+        }
+    }
+
+    // Reset the stream for next session
+    if (noise_reducer_) {
+        noise_reducer_->resetStream();
+    }
+
     Pa_StopStream(stream_);
     is_recording_ = false;
 }
 
+void AudioCapture::setDenoiseEnabled(bool enabled) {
+    if (noise_reducer_) {
+        noise_reducer_->setEnabled(enabled);
+    }
+}
+
+bool AudioCapture::isDenoiseEnabled() const {
+    return noise_reducer_ && noise_reducer_->isEnabled();
+}
+
 } // namespace secondvoice
diff --git a/src/audio/AudioCapture.h b/src/audio/AudioCapture.h
index e21118f..af39627 100644
--- a/src/audio/AudioCapture.h
+++ b/src/audio/AudioCapture.h
@@ -3,16 +3,20 @@
 #include <vector>
 #include <string>
 #include <functional>
+#include <atomic>
+#include <cmath>
+#include <memory>
 #include <portaudio.h>
 
 namespace secondvoice {
 
+class NoiseReducer;
+
 class AudioCapture {
 public:
     using AudioCallback = std::function<void(const std::vector<float>&)>;
 
-    // chunk_duration = window size, chunk_step = how often to send (overlap = duration - step)
-    AudioCapture(int sample_rate, int channels, int chunk_duration_seconds, int chunk_step_seconds = 0);
+    AudioCapture(int sample_rate, int channels);
     ~AudioCapture();
 
     bool initialize();
@@ -20,6 +24,26 @@ public:
     void stop();
     bool isRecording() const { return is_recording_; }
 
+    // Real-time audio levels (updated every callback, ~10ms)
+    float getCurrentRMS() const { return current_rms_; }
+    float getCurrentPeak() const { return current_peak_; }
+
+    // VAD settings (can be adjusted in real-time from UI)
+    void setVadThresholds(float rms_threshold, float peak_threshold) {
+        vad_rms_threshold_ = rms_threshold;
+        vad_peak_threshold_ = peak_threshold;
+    }
+    void setSilenceDuration(int ms) { silence_duration_ms_ = ms; }
+    void setMinSpeechDuration(int ms) { min_speech_duration_ms_ = ms; }
+    void setMaxSpeechDuration(int ms) { max_speech_duration_ms_ = ms; }
+
+    // VAD state (for UI display)
+    bool isSpeechDetected() const { return is_speech_active_; }
+
+    // Noise reduction
+    void setDenoiseEnabled(bool enabled);
+    bool isDenoiseEnabled() const;
+
 private:
     static int audioCallback(const void* input, void* output,
                             unsigned long frame_count,
@@ -29,13 +53,45 @@ private:
 
     int sample_rate_;
     int channels_;
-    int chunk_duration_seconds_;
-    int chunk_step_seconds_;  // How often to emit chunks (0 = same as duration, no overlap)
     bool is_recording_ = false;
 
     PaStream* stream_ = nullptr;
     AudioCallback callback_;
-    std::vector<float> buffer_;
+
+    // Speech accumulation buffer
+    std::vector<float> speech_buffer_;
+
+    // VAD state
+    std::atomic<bool> is_speech_active_{false};
+    int silence_samples_count_ = 0;  // How many samples of silence
+    int speech_samples_count_ = 0;   // How many samples of speech accumulated
+
+    // VAD parameters - Higher threshold to avoid false triggers on filtered noise
+    std::atomic<float> vad_rms_threshold_{0.02f};   // Was 0.01f
+    std::atomic<float> vad_peak_threshold_{0.08f};  // Was 0.04f
+    int silence_duration_ms_ = 400;      // Wait 400ms of silence before cutting
+    int min_speech_duration_ms_ = 300;   // Minimum speech to send
+    int max_speech_duration_ms_ = 25000; // 25s max before forced flush
+
+    // Adaptive noise floor
+    float noise_floor_ = 0.005f;         // Estimated background noise level
+    float noise_floor_alpha_ = 0.001f;   // Slower adaptation
+
+    // Hang time - wait before cutting to avoid mid-sentence cuts
+    int hang_frames_ = 0;
+    int hang_frames_threshold_ = 20;     // ~200ms tolerance for pauses
+
+    // Zero-crossing rate for speech vs noise discrimination
+    float last_zcr_ = 0.0f;
+    float speech_zcr_min_ = 0.01f;       // More permissive lower bound
+    float speech_zcr_max_ = 0.25f;       // More permissive upper bound
+
+    // Real-time audio levels (atomic for thread safety)
+    std::atomic<float> current_rms_{0.0f};
+    std::atomic<float> current_peak_{0.0f};
+
+    // Noise reduction
+    std::unique_ptr<NoiseReducer> noise_reducer_;
 };
 
 } // namespace secondvoice
diff --git a/src/audio/NoiseReducer.cpp b/src/audio/NoiseReducer.cpp
new file mode 100644
index 0000000..7acaca0
--- /dev/null
+++ b/src/audio/NoiseReducer.cpp
@@ -0,0 +1,249 @@
+#include "NoiseReducer.h"
+#include "AudioBuffer.h"
+#include <rnnoise.h>
+#include <cstring>
+#include <algorithm>
+#include <iostream>
+#include <filesystem>
+
+namespace secondvoice {
+
+NoiseReducer::NoiseReducer() {
+    state_ = rnnoise_create(nullptr);
+    upsample_buffer_.resize(RNNOISE_FRAME_SIZE);
+    downsample_buffer_.resize(INPUT_FRAME_SIZE);
+}
+
+NoiseReducer::~NoiseReducer() {
+    if (state_) {
+        rnnoise_destroy(state_);
+    }
+}
+
+void NoiseReducer::upsample(const float* in, float* out, int in_samples) {
+    // Simple linear interpolation 16kHz -> 48kHz (3x)
+    for (int i = 0; i < in_samples; ++i) {
+        int out_idx = i * 3;
+        float curr = in[i];
+        float next = (i + 1 < in_samples) ? in[i + 1] : curr;
+
+        out[out_idx] = curr;
+        out[out_idx + 1] = curr + (next - curr) * 0.333f;
+        out[out_idx + 2] = curr + (next - curr) * 0.667f;
+    }
+}
+
+void NoiseReducer::downsample(const float* in, float* out, int out_samples) {
+    // Simple decimation 48kHz -> 16kHz (take every 3rd sample with averaging)
+    for (int i = 0; i < out_samples; ++i) {
+        int in_idx = i * 3;
+        // Average of 3 samples for anti-aliasing
+        out[i] = (in[in_idx] + in[in_idx + 1] + in[in_idx + 2]) / 3.0f;
+    }
+}
+
+void NoiseReducer::suppressTransients(float* samples, size_t count) {
+    // Transient suppressor: attenuates sudden spikes (claps, clicks, pops)
+    // while preserving speech which has smoother amplitude changes
+
+    int transients_detected = 0;
+
+    for (size_t i = 0; i < count; ++i) {
+        float abs_sample = std::abs(samples[i]);
+
+        // IMPORTANT: Use envelope BEFORE updating for transient detection
+        // Otherwise the envelope rises with the clap and we miss it!
+        float safe_envelope = std::max(envelope_, 0.001f);
+        float ratio = abs_sample / safe_envelope;
+
+        bool is_transient = (ratio > transient_threshold_);
+
+        if (is_transient) {
+            // This is a transient - attenuate it heavily
+            float target_amplitude = safe_envelope * transient_ratio_;
+            float attenuation = target_amplitude / abs_sample;
+            samples[i] *= attenuation;
+            transients_detected++;
+
+            // DON'T update envelope from transients - keep it stable
+            // This prevents the envelope from "learning" the clap
+        } else {
+            // Only update envelope from non-transient samples
+            if (abs_sample > envelope_) {
+                envelope_ = envelope_ * (1.0f - envelope_attack_) + abs_sample * envelope_attack_;
+            } else {
+                envelope_ = envelope_ * (1.0f - envelope_release_) + abs_sample * envelope_release_;
+            }
+        }
+    }
+
+    if (transients_detected > 10) {
+        std::cout << "[RNNoise] Suppressed " << transients_detected << " transient samples (clap/click)" << std::endl;
+    }
+}
+
+void NoiseReducer::process(std::vector<float>& samples) {
+    if (!enabled_ || !state_ || samples.empty()) {
+        return;
+    }
+
+    // Process in chunks of INPUT_FRAME_SIZE (160 samples = 10ms at 16kHz)
+    size_t num_frames = samples.size() / INPUT_FRAME_SIZE;
+    float duration_ms = static_cast<float>(samples.size()) / 16.0f;  // 16 samples/ms at 16kHz
+    std::cout << "[RNNoise] Denoising " << duration_ms << "ms of audio (" << num_frames << " frames)" << std::endl;
+
+    for (size_t frame = 0; frame < num_frames; ++frame) {
+        float* frame_ptr = samples.data() + frame * INPUT_FRAME_SIZE;
+
+        // Upsample 16kHz -> 48kHz
+        upsample(frame_ptr, upsample_buffer_.data(), INPUT_FRAME_SIZE);
+
+        // RNNoise expects float input in range [-32768, 32768] (like int16)
+        // Our input is [-1, 1], so scale up
+        for (int i = 0; i < RNNOISE_FRAME_SIZE; ++i) {
+            upsample_buffer_[i] *= 32768.0f;
+        }
+
+        // Apply RNNoise
+        rnnoise_process_frame(state_, upsample_buffer_.data(), upsample_buffer_.data());
+
+        // Scale back to [-1, 1]
+        for (int i = 0; i < RNNOISE_FRAME_SIZE; ++i) {
+            upsample_buffer_[i] /= 32768.0f;
+            // Clamp to prevent any overflow
+            upsample_buffer_[i] = std::clamp(upsample_buffer_[i], -1.0f, 1.0f);
+        }
+
+        // Downsample 48kHz -> 16kHz
+        downsample(upsample_buffer_.data(), frame_ptr, INPUT_FRAME_SIZE);
+    }
+
+    // Handle remaining samples (less than a full frame) - just leave them as-is
+    // They'll be processed in the next call
+}
+
+std::vector<float> NoiseReducer::processCopy(const std::vector<float>& samples) {
+    std::vector<float> result = samples;
+    process(result);
+    return result;
+}
+
+std::vector<float> NoiseReducer::processRealtime(const float* samples, size_t count) {
+    std::vector<float> output;
+
+    if (!enabled_ || !state_ || count == 0) {
+        // Pass through unchanged
+        output.assign(samples, samples + count);
+        return output;
+    }
+
+    // Add new samples to streaming buffer
+    stream_input_buffer_.insert(stream_input_buffer_.end(), samples, samples + count);
+
+    // Process complete frames
+    while (stream_input_buffer_.size() >= INPUT_FRAME_SIZE) {
+        // Get frame pointer
+        float* frame_ptr = stream_input_buffer_.data();
+
+        // Step 1: Suppress transients (claps, clicks, pops) BEFORE RNNoise
+        suppressTransients(frame_ptr, INPUT_FRAME_SIZE);
+
+        // Step 2: Upsample 16kHz -> 48kHz
+        upsample(frame_ptr, upsample_buffer_.data(), INPUT_FRAME_SIZE);
+
+        // Scale to int16 range for RNNoise
+        for (int i = 0; i < RNNOISE_FRAME_SIZE; ++i) {
+            upsample_buffer_[i] *= 32768.0f;
+        }
+
+        // Apply RNNoise
+        rnnoise_process_frame(state_, upsample_buffer_.data(), upsample_buffer_.data());
+
+        // Scale back to [-1, 1]
+        for (int i = 0; i < RNNOISE_FRAME_SIZE; ++i) {
+            upsample_buffer_[i] /= 32768.0f;
+            upsample_buffer_[i] = std::clamp(upsample_buffer_[i], -1.0f, 1.0f);
+        }
+
+        // Downsample 48kHz -> 16kHz and add to output
+        std::vector<float> denoised_frame(INPUT_FRAME_SIZE);
+        downsample(upsample_buffer_.data(), denoised_frame.data(), INPUT_FRAME_SIZE);
+        output.insert(output.end(), denoised_frame.begin(), denoised_frame.end());
+
+        // Remove processed samples from buffer
+        stream_input_buffer_.erase(stream_input_buffer_.begin(),
+                                   stream_input_buffer_.begin() + INPUT_FRAME_SIZE);
+    }
+
+    return output;
+}
+
+std::vector<float> NoiseReducer::flushStream() {
+    std::vector<float> output;
+
+    if (!enabled_ || !state_ || stream_input_buffer_.empty()) {
+        // Return any remaining samples as-is
+        output = std::move(stream_input_buffer_);
+        stream_input_buffer_.clear();
+        return output;
+    }
+
+    // Process remaining samples (zero-pad to complete frame if needed)
+    while (!stream_input_buffer_.empty()) {
+        // Pad with zeros if we have less than a full frame
+        while (stream_input_buffer_.size() < INPUT_FRAME_SIZE) {
+            stream_input_buffer_.push_back(0.0f);
+        }
+
+        // Get frame pointer
+        float* frame_ptr = stream_input_buffer_.data();
+
+        // Step 1: Suppress transients
+        suppressTransients(frame_ptr, INPUT_FRAME_SIZE);
+
+        // Step 2: Upsample 16kHz -> 48kHz
+        upsample(frame_ptr, upsample_buffer_.data(), INPUT_FRAME_SIZE);
+
+        // Scale to int16 range for RNNoise
+        for (int i = 0; i < RNNOISE_FRAME_SIZE; ++i) {
+            upsample_buffer_[i] *= 32768.0f;
+        }
+
+        // Apply RNNoise
+        rnnoise_process_frame(state_, upsample_buffer_.data(), upsample_buffer_.data());
+
+        // Scale back to [-1, 1]
+        for (int i = 0; i < RNNOISE_FRAME_SIZE; ++i) {
+            upsample_buffer_[i] /= 32768.0f;
+            upsample_buffer_[i] = std::clamp(upsample_buffer_[i], -1.0f, 1.0f);
+        }
+
+        // Downsample 48kHz -> 16kHz and add to output
+        std::vector<float> denoised_frame(INPUT_FRAME_SIZE);
+        downsample(upsample_buffer_.data(), denoised_frame.data(), INPUT_FRAME_SIZE);
+        output.insert(output.end(), denoised_frame.begin(), denoised_frame.end());
+
+        // Remove processed samples from buffer
+        stream_input_buffer_.erase(stream_input_buffer_.begin(),
+                                   stream_input_buffer_.begin() + INPUT_FRAME_SIZE);
+    }
+
+    return output;
+}
+
+void NoiseReducer::savePreview(const std::vector<float>& samples) {
+    if (!save_preview_ || samples.empty()) {
+        return;
+    }
+
+    std::filesystem::create_directories("./denoised");
+    std::string filename = "./denoised/denoised_" + std::to_string(preview_count_++) + ".ogg";
+
+    AudioBuffer buffer(16000, 1);  // 16kHz mono
+    buffer.addSamples(samples);
+    if (buffer.saveToOpus(filename)) {
+        std::cout << "[RNNoise] Saved denoised preview: " << filename << std::endl;
+    }
+}
+
+} // namespace secondvoice
diff --git a/src/audio/NoiseReducer.h b/src/audio/NoiseReducer.h
new file mode 100644
index 0000000..ee6b650
--- /dev/null
+++ b/src/audio/NoiseReducer.h
@@ -0,0 +1,86 @@
+#pragma once
+
+#include <vector>
+#include <memory>
+
+// Forward declaration
+struct DenoiseState;
+
+namespace secondvoice {
+
+/**
+ * NoiseReducer - Wrapper around RNNoise for real-time audio denoising
+ *
+ * RNNoise expects 48kHz audio, but we work at 16kHz.
+ * This wrapper handles resampling transparently.
+ */
+class NoiseReducer {
+public:
+    NoiseReducer();
+    ~NoiseReducer();
+
+    // Process audio samples in-place (batch mode - for final output)
+    // Input/output: float samples normalized to [-1, 1]
+    void process(std::vector<float>& samples);
+
+    // Process a buffer, returns denoised copy
+    std::vector<float> processCopy(const std::vector<float>& samples);
+
+    // Real-time streaming: process samples as they come in
+    // Returns denoised samples (may be slightly delayed due to frame buffering)
+    std::vector<float> processRealtime(const float* samples, size_t count);
+
+    // Flush any remaining samples in the stream buffer (zero-padded if needed)
+    std::vector<float> flushStream();
+
+    // Clear the stream buffer (call when starting a new segment)
+    void resetStream() { stream_input_buffer_.clear(); }
+
+    // Enable/disable denoising
+    void setEnabled(bool enabled) { enabled_ = enabled; }
+    bool isEnabled() const { return enabled_; }
+
+    // Save denoised audio for preview
+    void setSavePreview(bool save) { save_preview_ = save; }
+    bool isSavePreviewEnabled() const { return save_preview_; }
+
+    // Save audio samples to Opus file (for preview)
+    void savePreview(const std::vector<float>& samples);
+
+private:
+    DenoiseState* state_ = nullptr;
+    bool enabled_ = true;
+    bool save_preview_ = true;  // Save denoised audio for listening
+    int preview_count_ = 0;
+
+    // RNNoise frame size (480 samples at 48kHz = 10ms)
+    static constexpr int RNNOISE_FRAME_SIZE = 480;
+
+    // Our frame size (160 samples at 16kHz = 10ms)
+    static constexpr int INPUT_FRAME_SIZE = 160;
+
+    // Resampling buffers
+    std::vector<float> upsample_buffer_;
+    std::vector<float> downsample_buffer_;
+
+    // Streaming input buffer (for real-time processing)
+    std::vector<float> stream_input_buffer_;
+
+    // Transient suppressor state
+    float envelope_ = 0.0f;              // Smoothed signal envelope
+    float envelope_attack_ = 0.01f;      // Fast attack to track signal
+    float envelope_release_ = 0.0005f;   // Slow release to maintain level
+    float transient_threshold_ = 4.0f;   // Spike must be 4x envelope to be considered transient
+    float transient_ratio_ = 0.1f;       // Attenuate transients to 10% (-90% reduction)
+
+    // Suppress transient sounds (claps, clicks, pops)
+    void suppressTransients(float* samples, size_t count);
+
+    // Upsample 16kHz to 48kHz (3x)
+    void upsample(const float* in, float* out, int in_samples);
+
+    // Downsample 48kHz to 16kHz (3x)
+    void downsample(const float* in, float* out, int out_samples);
+};
+
+} // namespace secondvoice
diff --git a/src/core/Pipeline.cpp b/src/core/Pipeline.cpp
index 788bb8e..2057322 100644
--- a/src/core/Pipeline.cpp
+++ b/src/core/Pipeline.cpp
@@ -24,18 +24,13 @@ Pipeline::~Pipeline() {
 bool Pipeline::initialize() {
     auto& config = Config::getInstance();
 
-    // Initialize audio capture with sliding window (overlap)
+    // Initialize audio capture with VAD-based segmentation
     audio_capture_ = std::make_unique<AudioCapture>(
         config.getAudioConfig().sample_rate,
-        config.getAudioConfig().channels,
-        config.getAudioConfig().chunk_duration_seconds,
-        config.getAudioConfig().chunk_step_seconds
+        config.getAudioConfig().channels
     );
 
-    std::cout << "[Pipeline] Audio: " << config.getAudioConfig().chunk_duration_seconds
-              << "s window, " << config.getAudioConfig().chunk_step_seconds
-              << "s step (overlap: " << (config.getAudioConfig().chunk_duration_seconds - config.getAudioConfig().chunk_step_seconds)
-              << "s)" << std::endl;
+    std::cout << "[Pipeline] VAD-based audio segmentation enabled" << std::endl;
 
     if (!audio_capture_->initialize()) {
         std::cerr << "Failed to initialize audio capture" << std::endl;
@@ -76,7 +71,7 @@ bool Pipeline::start() {
 
     running_ = true;
 
-    // Start background threads (NOT UI - that runs in main thread)
+    // Start background threads
     audio_thread_ = std::thread(&Pipeline::audioThread, this);
     processing_thread_ = std::thread(&Pipeline::processingThread, this);
 
@@ -95,9 +90,8 @@ void Pipeline::stop() {
         audio_capture_->stop();
     }
 
-    // Shutdown queues
+    // Shutdown queue
     audio_queue_.shutdown();
-    transcription_queue_.shutdown();
 
     // Wait for background threads
     if (audio_thread_.joinable()) {
@@ -146,7 +140,7 @@ void Pipeline::audioThread() {
     // Keep thread alive while recording
     auto start_time = std::chrono::steady_clock::now();
     while (running_ && audio_capture_->isRecording()) {
-        std::this_thread::sleep_for(std::chrono::seconds(1));
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
 
         // Update duration
         auto now = std::chrono::steady_clock::now();
@@ -157,11 +151,6 @@ void Pipeline::audioThread() {
 void Pipeline::processingThread() {
     auto& config = Config::getInstance();
 
-    // VAD threshold - audio RMS must exceed this to be considered speech
-    // Higher values = more aggressive noise filtering
-    constexpr float VAD_THRESHOLD = 0.02f;  // RMS energy threshold
-    constexpr float VAD_MIN_PEAK = 0.08f;   // Minimum peak amplitude
-
     while (running_) {
         auto chunk_opt = audio_queue_.wait_and_pop();
         if (!chunk_opt.has_value()) {
@@ -169,22 +158,8 @@ void Pipeline::processingThread() {
         }
 
         auto& chunk = chunk_opt.value();
-
-        // Voice Activity Detection - skip silent/noise chunks
-        float sum_squared = 0.0f;
-        float max_amplitude = 0.0f;
-        for (const float sample : chunk.data) {
-            sum_squared += sample * sample;
-            max_amplitude = std::max(max_amplitude, std::abs(sample));
-        }
-        float rms = std::sqrt(sum_squared / chunk.data.size());
-
-        if (rms < VAD_THRESHOLD || max_amplitude < VAD_MIN_PEAK) {
-            std::cout << "[Skip] Silent chunk (RMS: " << rms << ", Peak: " << max_amplitude << ")" << std::endl;
-            continue;
-        }
-
-        std::cout << "[Audio] Processing chunk (RMS: " << rms << ", Peak: " << max_amplitude << ")" << std::endl;
+        float duration = static_cast<float>(chunk.data.size()) / (chunk.sample_rate * chunk.channels);
+        std::cout << "[Processing] Speech segment: " << duration << "s" << std::endl;
 
         // Transcribe with Whisper
         auto whisper_result = whisper_client_->transcribe(
@@ -203,8 +178,9 @@ void Pipeline::processingThread() {
             continue;
         }
 
-        // Filter out Whisper hallucinations (common when audio is silent/unclear)
+        // Filter out Whisper hallucinations
         std::string text = whisper_result->text;
+
         // Trim whitespace
         size_t start = text.find_first_not_of(" \t\n\r");
         size_t end = text.find_last_not_of(" \t\n\r");
@@ -215,28 +191,61 @@ void Pipeline::processingThread() {
         text = text.substr(start, end - start + 1);
 
         // Skip known hallucinations and garbage
+        // Whisper hallucinates these when given silence/noise
         bool is_garbage = false;
-
-        // Too short to be meaningful
         if (text.length() < 4) {
             is_garbage = true;
-        }
-        // Known Whisper hallucinations
-        else if (text.find("Amara.org") != std::string::npos ||
-                 text.find("amara.org") != std::string::npos ||
-                 text.find("字幕") != std::string::npos ||
-                 text.find("subtitle") != std::string::npos ||
-                 text.find("Subtitle") != std::string::npos ||
-                 text.find("Thank you") != std::string::npos ||
-                 text.find("thanks for watching") != std::string::npos ||
-                 text.find("Subscribe") != std::string::npos ||
-                 text.find("谢谢观看") != std::string::npos ||
-                 text.find("订阅") != std::string::npos ||
-                 text.find("...") == 0) {  // Starts with ellipsis
+        } else if (
+            // Common Whisper hallucinations
+            text.find("Tingting") != std::string::npos ||
+            text.find("tingting") != std::string::npos ||
+            text.find("婷婷") != std::string::npos ||
+            text.find("Thank you") != std::string::npos ||
+            text.find("thanks for watching") != std::string::npos ||
+            text.find("Thanks for watching") != std::string::npos ||
+            text.find("Subscribe") != std::string::npos ||
+            text.find("subscribe") != std::string::npos ||
+            text.find("請訂閱") != std::string::npos ||
+            text.find("请订阅") != std::string::npos ||
+            text.find("訂閱") != std::string::npos ||
+            text.find("订阅") != std::string::npos ||
+            text.find("Amara.org") != std::string::npos ||
+            text.find("amara.org") != std::string::npos ||
+            text.find("字幕") != std::string::npos ||
+            text.find("subtitle") != std::string::npos ||
+            text.find("Subtitle") != std::string::npos ||
+            text.find("谢谢观看") != std::string::npos ||
+            text.find("謝謝觀看") != std::string::npos ||
+            text.find("感谢收看") != std::string::npos ||
+            text.find("感謝收看") != std::string::npos ||
+            text.find("下期再见") != std::string::npos ||
+            text.find("下期再見") != std::string::npos ||
+            text.find("再见") != std::string::npos ||
+            text.find("再見") != std::string::npos ||
+            text.find("music") != std::string::npos ||
+            text.find("Music") != std::string::npos ||
+            text.find("♪") != std::string::npos ||
+            text.find("silenc") != std::string::npos ||  // silence, silencieux
+            text.find("Silenc") != std::string::npos ||
+            text.find("...") == 0 ||
+            // Repetitive single words/sounds
+            text == "Bonjour" ||
+            text == "Hello" ||
+            text == "Hi" ||
+            text == "你好" ||
+            text == "好" ||
+            text == "嗯" ||
+            text == "啊" ||
+            text == "哈" ||
+            text == "呵呵" ||
+            text == "哈哈" ||
+            text == "嘻嘻" ||
+            text == "Hmm" ||
+            text == "Huh" ||
+            text == "Um" ||
+            text == "Uh") {
             is_garbage = true;
-        }
-        // Only punctuation or whitespace
-        else {
+        } else {
             bool has_content = false;
             for (char c : text) {
                 if (std::isalnum(static_cast<unsigned char>(c)) || (c & 0x80)) {
@@ -244,9 +253,7 @@ void Pipeline::processingThread() {
                     break;
                 }
             }
-            if (!has_content) {
-                is_garbage = true;
-            }
+            if (!has_content) is_garbage = true;
         }
 
         if (is_garbage) {
@@ -254,20 +261,14 @@ void Pipeline::processingThread() {
             continue;
         }
 
-        // Remove overlap with previous transcription
-        std::string deduplicated = removeOverlap(text, last_transcription_);
-        last_transcription_ = text;  // Store full text for next comparison
-
-        if (deduplicated.empty() || deduplicated.length() < 2) {
-            std::cout << "[Skip] Fully overlapping transcription" << std::endl;
-            continue;
+        // Track audio cost
+        if (ui_) {
+            ui_->addAudioCost(duration);
         }
 
-        std::cout << "[New content] " << deduplicated << std::endl;
-
-        // Translate with Claude (only the new, deduplicated content)
+        // Translate with Claude
         auto claude_result = claude_client_->translate(
-            deduplicated,
+            text,
             config.getClaudeConfig().system_prompt,
             config.getClaudeConfig().max_tokens,
             config.getClaudeConfig().temperature
@@ -278,10 +279,27 @@ void Pipeline::processingThread() {
             continue;
         }
 
-        // Add to UI
-        ui_->addTranslation(deduplicated, claude_result->text);
+        // Track Claude cost
+        if (ui_) {
+            ui_->addClaudeCost();
+        }
 
-        std::cout << "CN: " << deduplicated << std::endl;
+        // Simple accumulation
+        if (!accumulated_chinese_.empty()) {
+            accumulated_chinese_ += " ";
+            accumulated_french_ += " ";
+        }
+        accumulated_chinese_ += text;
+        accumulated_french_ += claude_result->text;
+
+        std::cout << "[Accumulated CN] " << accumulated_chinese_ << std::endl;
+        std::cout << "[Accumulated FR] " << accumulated_french_ << std::endl;
+
+        // Update UI
+        ui_->setAccumulatedText(accumulated_chinese_, accumulated_french_);
+        ui_->addTranslation(text, claude_result->text);
+
+        std::cout << "CN: " << text << std::endl;
         std::cout << "FR: " << claude_result->text << std::endl;
         std::cout << "---" << std::endl;
     }
@@ -290,58 +308,45 @@ void Pipeline::processingThread() {
 void Pipeline::update() {
     if (!ui_) return;
 
+    // Sync VAD thresholds from UI to AudioCapture
+    if (audio_capture_) {
+        audio_capture_->setVadThresholds(
+            ui_->getVadThreshold(),
+            ui_->getVadPeakThreshold()
+        );
+
+        // Update UI with audio levels
+        ui_->setCurrentRMS(audio_capture_->getCurrentRMS());
+        ui_->setCurrentPeak(audio_capture_->getCurrentPeak());
+    }
+
     ui_->setRecordingDuration(recording_duration_);
-    ui_->setProcessingStatus("Processing...");
+    ui_->setProcessingStatus(audio_capture_ && audio_capture_->isSpeechDetected() ? "Recording speech..." : "Listening...");
     ui_->render();
 
     // Check if stop was requested
     if (ui_->isStopRequested()) {
         running_ = false;
     }
+
+    // Check if clear was requested
+    if (ui_->isClearRequested()) {
+        clearAccumulated();
+        ui_->resetClearRequest();
+    }
 }
 
 bool Pipeline::shouldClose() const {
     return ui_ ? ui_->shouldClose() : true;
 }
 
-std::string Pipeline::removeOverlap(const std::string& current, const std::string& previous) {
-    if (previous.empty()) {
-        return current;
+void Pipeline::clearAccumulated() {
+    accumulated_chinese_.clear();
+    accumulated_french_.clear();
+    if (ui_) {
+        ui_->setAccumulatedText("", "");
     }
-
-    // Try to find overlap: check if current starts with end of previous
-    // Start from half the previous text (since we have 50% overlap)
-    size_t min_overlap = 4;  // Minimum characters to consider as overlap
-    size_t search_start = previous.length() / 3;  // Start looking from 1/3 into previous
-
-    for (size_t i = search_start; i < previous.length() - min_overlap; ++i) {
-        std::string suffix = previous.substr(i);
-        if (current.find(suffix) == 0) {
-            // Found overlap - return only the new part
-            std::string new_part = current.substr(suffix.length());
-            if (!new_part.empty()) {
-                std::cout << "[Overlap] Removed: \"" << suffix << "\"" << std::endl;
-                return new_part;
-            }
-        }
-    }
-
-    // No overlap found - check for partial word overlap at start
-    // This handles cases where the same content appears but tokenized differently
-    size_t max_check = std::min(current.length(), previous.length()) / 2;
-    for (size_t len = max_check; len >= min_overlap; --len) {
-        std::string end_prev = previous.substr(previous.length() - len);
-        std::string start_curr = current.substr(0, len);
-        if (end_prev == start_curr) {
-            std::string new_part = current.substr(len);
-            if (!new_part.empty()) {
-                std::cout << "[Overlap] Partial match removed: \"" << end_prev << "\"" << std::endl;
-                return new_part;
-            }
-        }
-    }
-
-    return current;  // No overlap detected
+    std::cout << "[Pipeline] Cleared accumulated text" << std::endl;
 }
 
 } // namespace secondvoice
diff --git a/src/core/Pipeline.h b/src/core/Pipeline.h
index 6647879..d32271a 100644
--- a/src/core/Pipeline.h
+++ b/src/core/Pipeline.h
@@ -21,10 +21,6 @@ struct AudioChunk {
     int channels;
 };
 
-struct TranscriptionResult {
-    std::string chinese_text;
-};
-
 class Pipeline {
 public:
     Pipeline();
@@ -40,6 +36,9 @@ public:
     bool isRunning() const { return running_; }
     bool shouldClose() const;
 
+    // Clear accumulated translations
+    void clearAccumulated();
+
 private:
     void audioThread();
     void processingThread();
@@ -51,7 +50,6 @@ private:
     std::unique_ptr<AudioBuffer> full_recording_;
 
     ThreadSafeQueue<AudioChunk> audio_queue_;
-    ThreadSafeQueue<TranscriptionResult> transcription_queue_;
 
     std::thread audio_thread_;
     std::thread processing_thread_;
@@ -59,9 +57,9 @@ private:
     std::atomic<bool> running_{false};
     std::atomic<int> recording_duration_{0};
 
-    // For overlap deduplication
-    std::string last_transcription_;
-    std::string removeOverlap(const std::string& current, const std::string& previous);
+    // Simple accumulation
+    std::string accumulated_chinese_;
+    std::string accumulated_french_;
 };
 
 } // namespace secondvoice
diff --git a/src/ui/TranslationUI.cpp b/src/ui/TranslationUI.cpp
index 5a61589..2004d25 100644
--- a/src/ui/TranslationUI.cpp
+++ b/src/ui/TranslationUI.cpp
@@ -199,10 +199,31 @@ void TranslationUI::render() {
         ImGuiWindowFlags_NoMove |
         ImGuiWindowFlags_NoCollapse);
 
+    // Two-column layout: main content on left, audio panel on right
+    float audio_panel_width = 280.0f;
+    float main_width = ImGui::GetWindowWidth() - audio_panel_width - 20.0f;
+
+    // Left column - main content
+    ImGui::BeginChild("MainContent", ImVec2(main_width, -1), false);
+
+    ImGui::Text("SecondVoice - Live Translation");
+    ImGui::Separator();
+    ImGui::Spacing();
+
+    renderAccumulated();
     renderTranslations();
     renderControls();
     renderStatus();
 
+    ImGui::EndChild();
+
+    ImGui::SameLine();
+
+    // Right column - audio panel
+    ImGui::BeginChild("AudioPanel", ImVec2(audio_panel_width, -1), true);
+    renderAudioPanel();
+    ImGui::EndChild();
+
     ImGui::End();
 
     // Rendering
@@ -225,11 +246,49 @@ void TranslationUI::addTranslation(const std::string& chinese, const std::string
     messages_.push_back({chinese, french});
 }
 
-void TranslationUI::renderTranslations() {
-    ImGui::Text("SecondVoice - Live Translation");
+void TranslationUI::setAccumulatedText(const std::string& chinese, const std::string& french) {
+    accumulated_chinese_ = chinese;
+    accumulated_french_ = french;
+}
+
+void TranslationUI::renderAccumulated() {
+    if (accumulated_chinese_.empty() && accumulated_french_.empty()) {
+        return;
+    }
+
+    ImGui::Text("Texte Recomposé");
+    ImGui::SameLine();
+    if (ImGui::SmallButton("Clear")) {
+        clear_requested_ = true;
+    }
     ImGui::Separator();
 
-    ImGui::BeginChild("Translations", ImVec2(0, -120), true);
+    ImGui::BeginChild("Accumulated", ImVec2(0, 150), true);
+
+    // Chinese accumulated text
+    ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(1.0f, 0.9f, 0.5f, 1.0f));  // Yellow
+    ImGui::TextWrapped("%s", accumulated_chinese_.c_str());
+    ImGui::PopStyleColor();
+
+    ImGui::Spacing();
+
+    // French accumulated text
+    ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.5f, 1.0f, 0.8f, 1.0f));  // Cyan
+    ImGui::TextWrapped("%s", accumulated_french_.c_str());
+    ImGui::PopStyleColor();
+
+    ImGui::EndChild();
+
+    ImGui::Spacing();
+}
+
+void TranslationUI::renderTranslations() {
+    ImGui::Text("Segments (détail)");
+    ImGui::Separator();
+
+    // Height depends on whether accumulated section is shown
+    float height = (accumulated_chinese_.empty() && accumulated_french_.empty()) ? -120 : -120;
+    ImGui::BeginChild("Translations", ImVec2(0, height), true);
 
     for (const auto& msg : messages_) {
         ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.5f, 0.8f, 1.0f, 1.0f));
@@ -281,4 +340,101 @@ void TranslationUI::renderStatus() {
     }
 }
 
+void TranslationUI::renderAudioPanel() {
+    ImGui::Text("Audio Monitor");
+    ImGui::Separator();
+    ImGui::Spacing();
+
+    // Current volume display
+    ImGui::Text("Volume Level");
+
+    // RMS meter (horizontal bar)
+    float rms_normalized = std::min(current_rms_ * 10.0f, 1.0f);  // Scale for visibility
+    ImVec4 rms_color = rms_normalized > vad_threshold_ * 10.0f ?
+        ImVec4(0.2f, 0.8f, 0.2f, 1.0f) :  // Green if above threshold
+        ImVec4(0.5f, 0.5f, 0.5f, 1.0f);   // Gray if below
+
+    ImGui::PushStyleColor(ImGuiCol_PlotHistogram, rms_color);
+    ImGui::ProgressBar(rms_normalized, ImVec2(-1, 20), "");
+    ImGui::PopStyleColor();
+
+    ImGui::Text("RMS: %.4f", current_rms_);
+
+    ImGui::Spacing();
+
+    // Peak meter
+    ImGui::Text("Peak Level");
+    float peak_normalized = std::min(current_peak_, 1.0f);
+    ImVec4 peak_color = peak_normalized > vad_peak_threshold_ ?
+        ImVec4(0.2f, 0.8f, 0.2f, 1.0f) :
+        ImVec4(0.5f, 0.5f, 0.5f, 1.0f);
+
+    ImGui::PushStyleColor(ImGuiCol_PlotHistogram, peak_color);
+    ImGui::ProgressBar(peak_normalized, ImVec2(-1, 20), "");
+    ImGui::PopStyleColor();
+
+    ImGui::Text("Peak: %.4f", current_peak_);
+
+    ImGui::Spacing();
+    ImGui::Separator();
+    ImGui::Spacing();
+
+    // VAD Threshold sliders
+    ImGui::Text("VAD Settings");
+    ImGui::Spacing();
+
+    ImGui::Text("RMS Threshold");
+    ImGui::SliderFloat("##rms_thresh", &vad_threshold_, 0.001f, 0.1f, "%.3f");
+
+    // Draw threshold line on the RMS meter area
+    ImGui::Spacing();
+
+    ImGui::Text("Peak Threshold");
+    ImGui::SliderFloat("##peak_thresh", &vad_peak_threshold_, 0.01f, 0.3f, "%.3f");
+
+    ImGui::Spacing();
+    ImGui::Separator();
+    ImGui::Spacing();
+
+    // Status indicator
+    bool is_active = (current_rms_ >= vad_threshold_) && (current_peak_ >= vad_peak_threshold_);
+    if (is_active) {
+        ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.2f, 1.0f, 0.2f, 1.0f));
+        ImGui::Text(">> VOICE DETECTED <<");
+        ImGui::PopStyleColor();
+    } else {
+        ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.6f, 0.6f, 0.6f, 1.0f));
+        ImGui::Text("   (silence)");
+        ImGui::PopStyleColor();
+    }
+
+    ImGui::Spacing();
+    ImGui::Separator();
+    ImGui::Spacing();
+
+    // Cost tracking
+    ImGui::Text("API Usage");
+    ImGui::Spacing();
+
+    // Audio sent to Whisper
+    int minutes = static_cast<int>(total_audio_seconds_) / 60;
+    int seconds = static_cast<int>(total_audio_seconds_) % 60;
+    ImGui::Text("Audio: %d:%02d", minutes, seconds);
+    ImGui::Text("Whisper calls: %d", whisper_calls_);
+    ImGui::Text("Claude calls: %d", claude_calls_);
+
+    ImGui::Spacing();
+
+    // Estimated cost
+    float cost = getEstimatedCost();
+    ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(1.0f, 0.8f, 0.3f, 1.0f));  // Gold
+    ImGui::Text("Est. cost: $%.4f", cost);
+    ImGui::PopStyleColor();
+
+    ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.6f, 0.6f, 0.6f, 1.0f));
+    ImGui::Text("(Whisper $0.006/min)");
+    ImGui::Text("(Haiku ~$0.001/call)");
+    ImGui::PopStyleColor();
+}
+
 } // namespace secondvoice
diff --git a/src/ui/TranslationUI.h b/src/ui/TranslationUI.h
index 8b97f0d..cb6145e 100644
--- a/src/ui/TranslationUI.h
+++ b/src/ui/TranslationUI.h
@@ -23,27 +23,66 @@ public:
     bool shouldClose() const;
 
     void addTranslation(const std::string& chinese, const std::string& french);
+    void setAccumulatedText(const std::string& chinese, const std::string& french);
     bool isStopRequested() const { return stop_requested_; }
     void resetStopRequest() { stop_requested_ = false; }
 
+    bool isClearRequested() const { return clear_requested_; }
+    void resetClearRequest() { clear_requested_ = false; }
+
     void setRecordingDuration(int seconds) { recording_duration_ = seconds; }
     void setProcessingStatus(const std::string& status) { processing_status_ = status; }
 
+    // Audio level monitoring
+    void setCurrentRMS(float rms) { current_rms_ = rms; }
+    void setCurrentPeak(float peak) { current_peak_ = peak; }
+    float getVadThreshold() const { return vad_threshold_; }
+    float getVadPeakThreshold() const { return vad_peak_threshold_; }
+
 private:
+    void renderAccumulated();
     void renderTranslations();
     void renderControls();
     void renderStatus();
+    void renderAudioPanel();
 
     int width_;
     int height_;
     GLFWwindow* window_ = nullptr;
 
     std::vector<TranslationMessage> messages_;
+    std::string accumulated_chinese_;
+    std::string accumulated_french_;
     bool stop_requested_ = false;
+    bool clear_requested_ = false;
     bool auto_scroll_ = true;
 
     int recording_duration_ = 0;
     std::string processing_status_;
+
+    // Audio monitoring
+    float current_rms_ = 0.0f;
+    float current_peak_ = 0.0f;
+    float vad_threshold_ = 0.02f;       // 2x higher to avoid false triggers
+    float vad_peak_threshold_ = 0.08f;  // 2x higher
+
+    // Cost tracking
+    float total_audio_seconds_ = 0.0f;
+    int whisper_calls_ = 0;
+    int claude_calls_ = 0;
+
+public:
+    void addAudioCost(float seconds) {
+        total_audio_seconds_ += seconds;
+        whisper_calls_++;
+    }
+    void addClaudeCost() { claude_calls_++; }
+    float getTotalAudioSeconds() const { return total_audio_seconds_; }
+    float getEstimatedCost() const {
+        // gpt-4o-mini-transcribe: $0.006/min = $0.0001/sec
+        // Haiku: ~$0.001 per short translation
+        return (total_audio_seconds_ * 0.0001f) + (claude_calls_ * 0.001f);
+    }
 };
 
 } // namespace secondvoice