From 9baa213a820c6f30493465c95d23d395adb378bd Mon Sep 17 00:00:00 2001
From: Trouve Alexis <Alexistrouve.pro@gmail.com>
Date: Sun, 23 Nov 2025 21:37:55 +0800
Subject: [PATCH] feat: Add session logging system with per-segment metrics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add SessionLogger class for structured debug logging
- Log each segment with: chinese, french, audio duration, RMS, latency
- Track filtered segments with reasons (hallucination, empty, failed)
- Create session directories with JSON files per segment
- Update Whisper prompt with anti-hallucination rules
- Integrate timing measurements for Whisper and Claude calls

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .gitignore                  |   1 +
 CMakeLists.txt              |   1 +
 config.json                 |   2 +-
 src/core/Pipeline.cpp       |  52 +++++++++-
 src/core/Pipeline.h         |   5 +
 src/utils/SessionLogger.cpp | 196 ++++++++++++++++++++++++++++++++++++
 src/utils/SessionLogger.h   |  63 ++++++++++++
 7 files changed, 318 insertions(+), 2 deletions(-)
 create mode 100644 src/utils/SessionLogger.cpp
 create mode 100644 src/utils/SessionLogger.h
diff --git a/.gitignore b/.gitignore
index ba38cc6..afa2b51 100644
--- a/.gitignore
+++ b/.gitignore
@@ -64,6 +64,7 @@ imgui.ini
 *.aac
 *.m4a
 denoised/
+sessions/
 
 # Claude Code local settings
 .claude/settings.local.json
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 071193d..a202a59 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -108,6 +108,7 @@ set(SOURCES_UI
     src/ui/TranslationUI.cpp
     # Utils
     src/utils/Config.cpp
+    src/utils/SessionLogger.cpp
     # Core
     src/core/Pipeline.cpp
 )
diff --git a/config.json b/config.json
index c0edd1a..b9115f1 100644
--- a/config.json
+++ b/config.json
@@ -10,7 +10,7 @@
     "model": "gpt-4o-mini-transcribe",
     "language": "zh",
     "temperature": 0.0,
-    "prompt": "The following is a conversation in Mandarin Chinese about business, family, and daily life. Common names: Tingting, Alexis.",
+    "prompt": "Transcription en direct d'une conversation en chinois mandarin. Plusieurs interlocuteurs parlent, parfois en même temps. RÈGLES STRICTES: (1) Ne transcris QUE les paroles audibles en chinois. (2) Si l'audio est inaudible, du bruit, ou du silence, renvoie une chaîne vide. (3) NE GÉNÈRE JAMAIS ces phrases: 谢谢观看, 感谢收看, 订阅, 请订阅, 下期再见, Thank you, Subscribe, 字幕. (4) Ignore: musique, applaudissements, rires, bruits de fond, respirations.",
     "stream": false,
     "response_format": "text"
   },
diff --git a/src/core/Pipeline.cpp b/src/core/Pipeline.cpp
index 7d621c2..08a02c6 100644
--- a/src/core/Pipeline.cpp
+++ b/src/core/Pipeline.cpp
@@ -70,6 +70,10 @@ bool Pipeline::start() {
     }
 
     running_ = true;
+    segment_id_ = 0;
+
+    // Start session logging
+    session_logger_.startSession();
 
     // Start background threads
     audio_thread_ = std::thread(&Pipeline::audioThread, this);
@@ -126,6 +130,9 @@ void Pipeline::stop() {
         transcript_ss << "transcripts/transcript_" << timestamp.str() << ".txt";
         ui_->exportTranscript(transcript_ss.str());
     }
+
+    // End session logging
+    session_logger_.endSession();
 }
 
 void Pipeline::audioThread() {
@@ -168,7 +175,19 @@ void Pipeline::processingThread() {
 
         auto& chunk = chunk_opt.value();
         float duration = static_cast<float>(chunk.data.size()) / (chunk.sample_rate * chunk.channels);
-        std::cout << "[Processing] Speech segment: " << duration << "s" << std::endl;
+
+        // Calculate audio RMS for logging
+        float audio_rms = 0.0f;
+        if (!chunk.data.empty()) {
+            float sum_sq = 0.0f;
+            for (float s : chunk.data) sum_sq += s * s;
+            audio_rms = std::sqrt(sum_sq / chunk.data.size());
+        }
+
+        std::cout << "[Processing] Speech segment: " << duration << "s (RMS=" << audio_rms << ")" << std::endl;
+
+        // Time Whisper
+        auto whisper_start = std::chrono::steady_clock::now();
 
         // Transcribe with Whisper
         auto whisper_result = whisper_client_->transcribe(
@@ -182,8 +201,13 @@ void Pipeline::processingThread() {
             config.getWhisperConfig().response_format
         );
 
+        auto whisper_end = std::chrono::steady_clock::now();
+        int64_t whisper_latency = std::chrono::duration_cast<std::chrono::milliseconds>(
+            whisper_end - whisper_start).count();
+
         if (!whisper_result.has_value()) {
             std::cerr << "Whisper transcription failed" << std::endl;
+            session_logger_.logFilteredSegment("", "whisper_failed", duration, audio_rms);
             continue;
         }
 
@@ -195,6 +219,7 @@ void Pipeline::processingThread() {
         size_t end = text.find_last_not_of(" \t\n\r");
         if (start == std::string::npos) {
             std::cout << "[Skip] Empty transcription" << std::endl;
+            session_logger_.logFilteredSegment("", "empty", duration, audio_rms);
             continue;
         }
         text = text.substr(start, end - start + 1);
@@ -267,6 +292,7 @@ void Pipeline::processingThread() {
 
         if (is_garbage) {
             std::cout << "[Skip] Filtered: " << text << std::endl;
+            session_logger_.logFilteredSegment(text, "hallucination", duration, audio_rms);
             continue;
         }
 
@@ -275,6 +301,9 @@ void Pipeline::processingThread() {
             ui_->addAudioCost(duration);
         }
 
+        // Time Claude
+        auto claude_start = std::chrono::steady_clock::now();
+
         // Translate with Claude
         auto claude_result = claude_client_->translate(
             text,
@@ -283,8 +312,13 @@ void Pipeline::processingThread() {
             config.getClaudeConfig().temperature
         );
 
+        auto claude_end = std::chrono::steady_clock::now();
+        int64_t claude_latency = std::chrono::duration_cast<std::chrono::milliseconds>(
+            claude_end - claude_start).count();
+
         if (!claude_result.has_value()) {
             std::cerr << "Claude translation failed" << std::endl;
+            session_logger_.logFilteredSegment(text, "claude_failed", duration, audio_rms);
             continue;
         }
 
@@ -308,8 +342,24 @@ void Pipeline::processingThread() {
         ui_->setAccumulatedText(accumulated_chinese_, accumulated_french_);
         ui_->addTranslation(text, claude_result->text);
 
+        // Log successful segment
+        segment_id_++;
+        SegmentLog seg;
+        seg.id = segment_id_;
+        seg.chinese = text;
+        seg.french = claude_result->text;
+        seg.audio_duration_sec = duration;
+        seg.audio_rms = audio_rms;
+        seg.whisper_latency_ms = whisper_latency;
+        seg.claude_latency_ms = claude_latency;
+        seg.was_filtered = false;
+        seg.filter_reason = "";
+        seg.timestamp = "";  // Will be set by logger
+        session_logger_.logSegment(seg);
+
         std::cout << "CN: " << text << std::endl;
         std::cout << "FR: " << claude_result->text << std::endl;
+        std::cout << "[Latency] Whisper: " << whisper_latency << "ms, Claude: " << claude_latency << "ms" << std::endl;
         std::cout << "---" << std::endl;
     }
 }
diff --git a/src/core/Pipeline.h b/src/core/Pipeline.h
index d32271a..42a62f7 100644
--- a/src/core/Pipeline.h
+++ b/src/core/Pipeline.h
@@ -6,6 +6,7 @@
 #include <string>
 #include <vector>
 #include "../utils/ThreadSafeQueue.h"
+#include "../utils/SessionLogger.h"
 
 namespace secondvoice {
 
@@ -60,6 +61,10 @@ private:
     // Simple accumulation
     std::string accumulated_chinese_;
     std::string accumulated_french_;
+
+    // Session logging
+    SessionLogger session_logger_;
+    int segment_id_ = 0;
 };
 
 } // namespace secondvoice
diff --git a/src/utils/SessionLogger.cpp b/src/utils/SessionLogger.cpp
new file mode 100644
index 0000000..ed371c0
--- /dev/null
+++ b/src/utils/SessionLogger.cpp
@@ -0,0 +1,196 @@
+#include "SessionLogger.h"
+#include <nlohmann/json.hpp>
+#include <filesystem>
+#include <iostream>
+#include <iomanip>
+#include <sstream>
+
+namespace secondvoice {
+
+using json = nlohmann::json;
+
+SessionLogger::SessionLogger() = default;
+
+SessionLogger::~SessionLogger() {
+    if (is_active_) {
+        endSession();
+    }
+}
+
+std::string SessionLogger::getCurrentTimestamp() const {
+    auto now = std::chrono::system_clock::now();
+    auto time_t = std::chrono::system_clock::to_time_t(now);
+    auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
+        now.time_since_epoch()) % 1000;
+
+    std::stringstream ss;
+    ss << std::put_time(std::localtime(&time_t), "%Y-%m-%d_%H%M%S");
+    return ss.str();
+}
+
+void SessionLogger::startSession() {
+    if (is_active_) {
+        endSession();
+    }
+
+    session_start_time_ = getCurrentTimestamp();
+    session_path_ = "./sessions/" + session_start_time_;
+
+    // Create directories
+    std::filesystem::create_directories(session_path_ + "/segments");
+
+    is_active_ = true;
+    segment_count_ = 0;
+    filtered_count_ = 0;
+    total_audio_sec_ = 0.0f;
+    total_whisper_ms_ = 0;
+    total_claude_ms_ = 0;
+    segments_.clear();
+
+    std::cout << "[Session] Started: " << session_path_ << std::endl;
+}
+
+void SessionLogger::endSession() {
+    if (!is_active_) return;
+
+    writeSessionJson();
+    is_active_ = false;
+
+    std::cout << "[Session] Ended: " << segment_count_ << " segments, "
+              << filtered_count_ << " filtered, "
+              << total_audio_sec_ << "s audio" << std::endl;
+}
+
+void SessionLogger::logSegment(const SegmentLog& segment) {
+    if (!is_active_) return;
+
+    // Update counters
+    segment_count_++;
+    total_audio_sec_ += segment.audio_duration_sec;
+    total_whisper_ms_ += segment.whisper_latency_ms;
+    total_claude_ms_ += segment.claude_latency_ms;
+
+    // Store segment
+    segments_.push_back(segment);
+
+    // Write individual segment JSON
+    std::stringstream filename;
+    filename << session_path_ << "/segments/"
+             << std::setfill('0') << std::setw(3) << segment.id << ".json";
+
+    json j;
+    j["id"] = segment.id;
+    j["chinese"] = segment.chinese;
+    j["french"] = segment.french;
+    j["audio_duration_sec"] = segment.audio_duration_sec;
+    j["audio_rms"] = segment.audio_rms;
+    j["whisper_latency_ms"] = segment.whisper_latency_ms;
+    j["claude_latency_ms"] = segment.claude_latency_ms;
+    j["was_filtered"] = segment.was_filtered;
+    j["filter_reason"] = segment.filter_reason;
+    j["timestamp"] = segment.timestamp;
+
+    std::ofstream file(filename.str());
+    if (file.is_open()) {
+        file << j.dump(2);
+        file.close();
+    }
+
+    std::cout << "[Session] Logged segment #" << segment.id
+              << " (" << segment.audio_duration_sec << "s)" << std::endl;
+}
+
+void SessionLogger::logFilteredSegment(const std::string& chinese, const std::string& reason,
+                                       float audio_duration, float audio_rms) {
+    if (!is_active_) return;
+
+    filtered_count_++;
+    total_audio_sec_ += audio_duration;
+
+    // Log filtered segment with special marker
+    SegmentLog seg;
+    seg.id = segment_count_ + filtered_count_;
+    seg.chinese = chinese;
+    seg.french = "[FILTERED]";
+    seg.audio_duration_sec = audio_duration;
+    seg.audio_rms = audio_rms;
+    seg.whisper_latency_ms = 0;
+    seg.claude_latency_ms = 0;
+    seg.was_filtered = true;
+    seg.filter_reason = reason;
+    seg.timestamp = getCurrentTimestamp();
+
+    segments_.push_back(seg);
+
+    // Write filtered segment JSON
+    std::stringstream filename;
+    filename << session_path_ << "/segments/"
+             << std::setfill('0') << std::setw(3) << seg.id << "_filtered.json";
+
+    json j;
+    j["id"] = seg.id;
+    j["chinese"] = seg.chinese;
+    j["filter_reason"] = reason;
+    j["audio_duration_sec"] = audio_duration;
+    j["audio_rms"] = audio_rms;
+    j["timestamp"] = seg.timestamp;
+
+    std::ofstream file(filename.str());
+    if (file.is_open()) {
+        file << j.dump(2);
+        file.close();
+    }
+}
+
+void SessionLogger::writeSessionJson() {
+    json session;
+    session["start_time"] = session_start_time_;
+    session["end_time"] = getCurrentTimestamp();
+    session["total_segments"] = segment_count_;
+    session["filtered_segments"] = filtered_count_;
+    session["total_audio_seconds"] = total_audio_sec_;
+    session["avg_whisper_latency_ms"] = segment_count_ > 0 ?
+        total_whisper_ms_ / segment_count_ : 0;
+    session["avg_claude_latency_ms"] = segment_count_ > 0 ?
+        total_claude_ms_ / segment_count_ : 0;
+
+    // Summary of all segments
+    json segments_summary = json::array();
+    for (const auto& seg : segments_) {
+        json s;
+        s["id"] = seg.id;
+        s["chinese"] = seg.chinese;
+        s["french"] = seg.french;
+        s["duration"] = seg.audio_duration_sec;
+        s["filtered"] = seg.was_filtered;
+        if (seg.was_filtered) {
+            s["filter_reason"] = seg.filter_reason;
+        }
+        segments_summary.push_back(s);
+    }
+    session["segments"] = segments_summary;
+
+    std::string filepath = session_path_ + "/session.json";
+    std::ofstream file(filepath);
+    if (file.is_open()) {
+        file << session.dump(2);
+        file.close();
+        std::cout << "[Session] Wrote " << filepath << std::endl;
+    }
+
+    // Also write plain text transcript
+    std::string transcript_path = session_path_ + "/transcript.txt";
+    std::ofstream transcript(transcript_path);
+    if (transcript.is_open()) {
+        transcript << "=== SecondVoice Session " << session_start_time_ << " ===\n\n";
+        for (const auto& seg : segments_) {
+            if (!seg.was_filtered) {
+                transcript << "CN: " << seg.chinese << "\n";
+                transcript << "FR: " << seg.french << "\n\n";
+            }
+        }
+        transcript.close();
+    }
+}
+
+} // namespace secondvoice
diff --git a/src/utils/SessionLogger.h b/src/utils/SessionLogger.h
new file mode 100644
index 0000000..c999967
--- /dev/null
+++ b/src/utils/SessionLogger.h
@@ -0,0 +1,63 @@
+#pragma once
+
+#include <string>
+#include <vector>
+#include <chrono>
+#include <fstream>
+
+namespace secondvoice {
+
+struct SegmentLog {
+    int id;
+    std::string chinese;
+    std::string french;
+    float audio_duration_sec;
+    float audio_rms;
+    int64_t whisper_latency_ms;
+    int64_t claude_latency_ms;
+    bool was_filtered;
+    std::string filter_reason;
+    std::string timestamp;
+};
+
+class SessionLogger {
+public:
+    SessionLogger();
+    ~SessionLogger();
+
+    // Start a new session (creates directory)
+    void startSession();
+
+    // End session (writes session.json summary)
+    void endSession();
+
+    // Log a segment
+    void logSegment(const SegmentLog& segment);
+
+    // Log a filtered/skipped segment
+    void logFilteredSegment(const std::string& chinese, const std::string& reason,
+                           float audio_duration, float audio_rms);
+
+    // Get current session path
+    std::string getSessionPath() const { return session_path_; }
+
+    // Check if session is active
+    bool isActive() const { return is_active_; }
+
+private:
+    std::string getCurrentTimestamp() const;
+    void writeSessionJson();
+
+    bool is_active_ = false;
+    std::string session_path_;
+    std::string session_start_time_;
+    int segment_count_ = 0;
+    int filtered_count_ = 0;
+    float total_audio_sec_ = 0.0f;
+    int total_whisper_ms_ = 0;
+    int total_claude_ms_ = 0;
+
+    std::vector<SegmentLog> segments_;
+};
+
+} // namespace secondvoice