From 9baa213a820c6f30493465c95d23d395adb378bd Mon Sep 17 00:00:00 2001 From: Trouve Alexis Date: Sun, 23 Nov 2025 21:37:55 +0800 Subject: [PATCH] feat: Add session logging system with per-segment metrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add SessionLogger class for structured debug logging - Log each segment with: chinese, french, audio duration, RMS, latency - Track filtered segments with reasons (hallucination, empty, failed) - Create session directories with JSON files per segment - Update Whisper prompt with anti-hallucination rules - Integrate timing measurements for Whisper and Claude calls 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .gitignore | 1 + CMakeLists.txt | 1 + config.json | 2 +- src/core/Pipeline.cpp | 52 +++++++++- src/core/Pipeline.h | 5 + src/utils/SessionLogger.cpp | 196 ++++++++++++++++++++++++++++++++++++ src/utils/SessionLogger.h | 63 ++++++++++++ 7 files changed, 318 insertions(+), 2 deletions(-) create mode 100644 src/utils/SessionLogger.cpp create mode 100644 src/utils/SessionLogger.h diff --git a/.gitignore b/.gitignore index ba38cc6..afa2b51 100644 --- a/.gitignore +++ b/.gitignore @@ -64,6 +64,7 @@ imgui.ini *.aac *.m4a denoised/ +sessions/ # Claude Code local settings .claude/settings.local.json diff --git a/CMakeLists.txt b/CMakeLists.txt index 071193d..a202a59 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,6 +108,7 @@ set(SOURCES_UI src/ui/TranslationUI.cpp # Utils src/utils/Config.cpp + src/utils/SessionLogger.cpp # Core src/core/Pipeline.cpp ) diff --git a/config.json b/config.json index c0edd1a..b9115f1 100644 --- a/config.json +++ b/config.json @@ -10,7 +10,7 @@ "model": "gpt-4o-mini-transcribe", "language": "zh", "temperature": 0.0, - "prompt": "The following is a conversation in Mandarin Chinese about business, family, and daily life. Common names: Tingting, Alexis.", + "prompt": "Transcription en direct d'une conversation en chinois mandarin. Plusieurs interlocuteurs parlent, parfois en même temps. RÈGLES STRICTES: (1) Ne transcris QUE les paroles audibles en chinois. (2) Si l'audio est inaudible, du bruit, ou du silence, renvoie une chaîne vide. (3) NE GÉNÈRE JAMAIS ces phrases: 谢谢观看, 感谢收看, 订阅, 请订阅, 下期再见, Thank you, Subscribe, 字幕. (4) Ignore: musique, applaudissements, rires, bruits de fond, respirations.", "stream": false, "response_format": "text" }, diff --git a/src/core/Pipeline.cpp b/src/core/Pipeline.cpp index 7d621c2..08a02c6 100644 --- a/src/core/Pipeline.cpp +++ b/src/core/Pipeline.cpp @@ -70,6 +70,10 @@ bool Pipeline::start() { } running_ = true; + segment_id_ = 0; + + // Start session logging + session_logger_.startSession(); // Start background threads audio_thread_ = std::thread(&Pipeline::audioThread, this); @@ -126,6 +130,9 @@ void Pipeline::stop() { transcript_ss << "transcripts/transcript_" << timestamp.str() << ".txt"; ui_->exportTranscript(transcript_ss.str()); } + + // End session logging + session_logger_.endSession(); } void Pipeline::audioThread() { @@ -168,7 +175,19 @@ void Pipeline::processingThread() { auto& chunk = chunk_opt.value(); float duration = static_cast(chunk.data.size()) / (chunk.sample_rate * chunk.channels); - std::cout << "[Processing] Speech segment: " << duration << "s" << std::endl; + + // Calculate audio RMS for logging + float audio_rms = 0.0f; + if (!chunk.data.empty()) { + float sum_sq = 0.0f; + for (float s : chunk.data) sum_sq += s * s; + audio_rms = std::sqrt(sum_sq / chunk.data.size()); + } + + std::cout << "[Processing] Speech segment: " << duration << "s (RMS=" << audio_rms << ")" << std::endl; + + // Time Whisper + auto whisper_start = std::chrono::steady_clock::now(); // Transcribe with Whisper auto whisper_result = whisper_client_->transcribe( @@ -182,8 +201,13 @@ void Pipeline::processingThread() { config.getWhisperConfig().response_format ); + auto whisper_end = std::chrono::steady_clock::now(); + int64_t whisper_latency = std::chrono::duration_cast( + whisper_end - whisper_start).count(); + if (!whisper_result.has_value()) { std::cerr << "Whisper transcription failed" << std::endl; + session_logger_.logFilteredSegment("", "whisper_failed", duration, audio_rms); continue; } @@ -195,6 +219,7 @@ void Pipeline::processingThread() { size_t end = text.find_last_not_of(" \t\n\r"); if (start == std::string::npos) { std::cout << "[Skip] Empty transcription" << std::endl; + session_logger_.logFilteredSegment("", "empty", duration, audio_rms); continue; } text = text.substr(start, end - start + 1); @@ -267,6 +292,7 @@ void Pipeline::processingThread() { if (is_garbage) { std::cout << "[Skip] Filtered: " << text << std::endl; + session_logger_.logFilteredSegment(text, "hallucination", duration, audio_rms); continue; } @@ -275,6 +301,9 @@ void Pipeline::processingThread() { ui_->addAudioCost(duration); } + // Time Claude + auto claude_start = std::chrono::steady_clock::now(); + // Translate with Claude auto claude_result = claude_client_->translate( text, @@ -283,8 +312,13 @@ void Pipeline::processingThread() { config.getClaudeConfig().temperature ); + auto claude_end = std::chrono::steady_clock::now(); + int64_t claude_latency = std::chrono::duration_cast( + claude_end - claude_start).count(); + if (!claude_result.has_value()) { std::cerr << "Claude translation failed" << std::endl; + session_logger_.logFilteredSegment(text, "claude_failed", duration, audio_rms); continue; } @@ -308,8 +342,24 @@ void Pipeline::processingThread() { ui_->setAccumulatedText(accumulated_chinese_, accumulated_french_); ui_->addTranslation(text, claude_result->text); + // Log successful segment + segment_id_++; + SegmentLog seg; + seg.id = segment_id_; + seg.chinese = text; + seg.french = claude_result->text; + seg.audio_duration_sec = duration; + seg.audio_rms = audio_rms; + seg.whisper_latency_ms = whisper_latency; + seg.claude_latency_ms = claude_latency; + seg.was_filtered = false; + seg.filter_reason = ""; + seg.timestamp = ""; // Will be set by logger + session_logger_.logSegment(seg); + std::cout << "CN: " << text << std::endl; std::cout << "FR: " << claude_result->text << std::endl; + std::cout << "[Latency] Whisper: " << whisper_latency << "ms, Claude: " << claude_latency << "ms" << std::endl; std::cout << "---" << std::endl; } } diff --git a/src/core/Pipeline.h b/src/core/Pipeline.h index d32271a..42a62f7 100644 --- a/src/core/Pipeline.h +++ b/src/core/Pipeline.h @@ -6,6 +6,7 @@ #include #include #include "../utils/ThreadSafeQueue.h" +#include "../utils/SessionLogger.h" namespace secondvoice { @@ -60,6 +61,10 @@ private: // Simple accumulation std::string accumulated_chinese_; std::string accumulated_french_; + + // Session logging + SessionLogger session_logger_; + int segment_id_ = 0; }; } // namespace secondvoice diff --git a/src/utils/SessionLogger.cpp b/src/utils/SessionLogger.cpp new file mode 100644 index 0000000..ed371c0 --- /dev/null +++ b/src/utils/SessionLogger.cpp @@ -0,0 +1,196 @@ +#include "SessionLogger.h" +#include +#include +#include +#include +#include + +namespace secondvoice { + +using json = nlohmann::json; + +SessionLogger::SessionLogger() = default; + +SessionLogger::~SessionLogger() { + if (is_active_) { + endSession(); + } +} + +std::string SessionLogger::getCurrentTimestamp() const { + auto now = std::chrono::system_clock::now(); + auto time_t = std::chrono::system_clock::to_time_t(now); + auto ms = std::chrono::duration_cast( + now.time_since_epoch()) % 1000; + + std::stringstream ss; + ss << std::put_time(std::localtime(&time_t), "%Y-%m-%d_%H%M%S"); + return ss.str(); +} + +void SessionLogger::startSession() { + if (is_active_) { + endSession(); + } + + session_start_time_ = getCurrentTimestamp(); + session_path_ = "./sessions/" + session_start_time_; + + // Create directories + std::filesystem::create_directories(session_path_ + "/segments"); + + is_active_ = true; + segment_count_ = 0; + filtered_count_ = 0; + total_audio_sec_ = 0.0f; + total_whisper_ms_ = 0; + total_claude_ms_ = 0; + segments_.clear(); + + std::cout << "[Session] Started: " << session_path_ << std::endl; +} + +void SessionLogger::endSession() { + if (!is_active_) return; + + writeSessionJson(); + is_active_ = false; + + std::cout << "[Session] Ended: " << segment_count_ << " segments, " + << filtered_count_ << " filtered, " + << total_audio_sec_ << "s audio" << std::endl; +} + +void SessionLogger::logSegment(const SegmentLog& segment) { + if (!is_active_) return; + + // Update counters + segment_count_++; + total_audio_sec_ += segment.audio_duration_sec; + total_whisper_ms_ += segment.whisper_latency_ms; + total_claude_ms_ += segment.claude_latency_ms; + + // Store segment + segments_.push_back(segment); + + // Write individual segment JSON + std::stringstream filename; + filename << session_path_ << "/segments/" + << std::setfill('0') << std::setw(3) << segment.id << ".json"; + + json j; + j["id"] = segment.id; + j["chinese"] = segment.chinese; + j["french"] = segment.french; + j["audio_duration_sec"] = segment.audio_duration_sec; + j["audio_rms"] = segment.audio_rms; + j["whisper_latency_ms"] = segment.whisper_latency_ms; + j["claude_latency_ms"] = segment.claude_latency_ms; + j["was_filtered"] = segment.was_filtered; + j["filter_reason"] = segment.filter_reason; + j["timestamp"] = segment.timestamp; + + std::ofstream file(filename.str()); + if (file.is_open()) { + file << j.dump(2); + file.close(); + } + + std::cout << "[Session] Logged segment #" << segment.id + << " (" << segment.audio_duration_sec << "s)" << std::endl; +} + +void SessionLogger::logFilteredSegment(const std::string& chinese, const std::string& reason, + float audio_duration, float audio_rms) { + if (!is_active_) return; + + filtered_count_++; + total_audio_sec_ += audio_duration; + + // Log filtered segment with special marker + SegmentLog seg; + seg.id = segment_count_ + filtered_count_; + seg.chinese = chinese; + seg.french = "[FILTERED]"; + seg.audio_duration_sec = audio_duration; + seg.audio_rms = audio_rms; + seg.whisper_latency_ms = 0; + seg.claude_latency_ms = 0; + seg.was_filtered = true; + seg.filter_reason = reason; + seg.timestamp = getCurrentTimestamp(); + + segments_.push_back(seg); + + // Write filtered segment JSON + std::stringstream filename; + filename << session_path_ << "/segments/" + << std::setfill('0') << std::setw(3) << seg.id << "_filtered.json"; + + json j; + j["id"] = seg.id; + j["chinese"] = seg.chinese; + j["filter_reason"] = reason; + j["audio_duration_sec"] = audio_duration; + j["audio_rms"] = audio_rms; + j["timestamp"] = seg.timestamp; + + std::ofstream file(filename.str()); + if (file.is_open()) { + file << j.dump(2); + file.close(); + } +} + +void SessionLogger::writeSessionJson() { + json session; + session["start_time"] = session_start_time_; + session["end_time"] = getCurrentTimestamp(); + session["total_segments"] = segment_count_; + session["filtered_segments"] = filtered_count_; + session["total_audio_seconds"] = total_audio_sec_; + session["avg_whisper_latency_ms"] = segment_count_ > 0 ? + total_whisper_ms_ / segment_count_ : 0; + session["avg_claude_latency_ms"] = segment_count_ > 0 ? + total_claude_ms_ / segment_count_ : 0; + + // Summary of all segments + json segments_summary = json::array(); + for (const auto& seg : segments_) { + json s; + s["id"] = seg.id; + s["chinese"] = seg.chinese; + s["french"] = seg.french; + s["duration"] = seg.audio_duration_sec; + s["filtered"] = seg.was_filtered; + if (seg.was_filtered) { + s["filter_reason"] = seg.filter_reason; + } + segments_summary.push_back(s); + } + session["segments"] = segments_summary; + + std::string filepath = session_path_ + "/session.json"; + std::ofstream file(filepath); + if (file.is_open()) { + file << session.dump(2); + file.close(); + std::cout << "[Session] Wrote " << filepath << std::endl; + } + + // Also write plain text transcript + std::string transcript_path = session_path_ + "/transcript.txt"; + std::ofstream transcript(transcript_path); + if (transcript.is_open()) { + transcript << "=== SecondVoice Session " << session_start_time_ << " ===\n\n"; + for (const auto& seg : segments_) { + if (!seg.was_filtered) { + transcript << "CN: " << seg.chinese << "\n"; + transcript << "FR: " << seg.french << "\n\n"; + } + } + transcript.close(); + } +} + +} // namespace secondvoice diff --git a/src/utils/SessionLogger.h b/src/utils/SessionLogger.h new file mode 100644 index 0000000..c999967 --- /dev/null +++ b/src/utils/SessionLogger.h @@ -0,0 +1,63 @@ +#pragma once + +#include +#include +#include +#include + +namespace secondvoice { + +struct SegmentLog { + int id; + std::string chinese; + std::string french; + float audio_duration_sec; + float audio_rms; + int64_t whisper_latency_ms; + int64_t claude_latency_ms; + bool was_filtered; + std::string filter_reason; + std::string timestamp; +}; + +class SessionLogger { +public: + SessionLogger(); + ~SessionLogger(); + + // Start a new session (creates directory) + void startSession(); + + // End session (writes session.json summary) + void endSession(); + + // Log a segment + void logSegment(const SegmentLog& segment); + + // Log a filtered/skipped segment + void logFilteredSegment(const std::string& chinese, const std::string& reason, + float audio_duration, float audio_rms); + + // Get current session path + std::string getSessionPath() const { return session_path_; } + + // Check if session is active + bool isActive() const { return is_active_; } + +private: + std::string getCurrentTimestamp() const; + void writeSessionJson(); + + bool is_active_ = false; + std::string session_path_; + std::string session_start_time_; + int segment_count_ = 0; + int filtered_count_ = 0; + float total_audio_sec_ = 0.0f; + int total_whisper_ms_ = 0; + int total_claude_ms_ = 0; + + std::vector segments_; +}; + +} // namespace secondvoice