feat: Add session logging system with per-segment metrics
- Add SessionLogger class for structured debug logging - Log each segment with: chinese, french, audio duration, RMS, latency - Track filtered segments with reasons (hallucination, empty, failed) - Create session directories with JSON files per segment - Update Whisper prompt with anti-hallucination rules - Integrate timing measurements for Whisper and Claude calls 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
9163e082da
commit
9baa213a82
1
.gitignore
vendored
1
.gitignore
vendored
@ -64,6 +64,7 @@ imgui.ini
|
||||
*.aac
|
||||
*.m4a
|
||||
denoised/
|
||||
sessions/
|
||||
|
||||
# Claude Code local settings
|
||||
.claude/settings.local.json
|
||||
|
||||
@ -108,6 +108,7 @@ set(SOURCES_UI
|
||||
src/ui/TranslationUI.cpp
|
||||
# Utils
|
||||
src/utils/Config.cpp
|
||||
src/utils/SessionLogger.cpp
|
||||
# Core
|
||||
src/core/Pipeline.cpp
|
||||
)
|
||||
|
||||
@ -10,7 +10,7 @@
|
||||
"model": "gpt-4o-mini-transcribe",
|
||||
"language": "zh",
|
||||
"temperature": 0.0,
|
||||
"prompt": "The following is a conversation in Mandarin Chinese about business, family, and daily life. Common names: Tingting, Alexis.",
|
||||
"prompt": "Transcription en direct d'une conversation en chinois mandarin. Plusieurs interlocuteurs parlent, parfois en même temps. RÈGLES STRICTES: (1) Ne transcris QUE les paroles audibles en chinois. (2) Si l'audio est inaudible, du bruit, ou du silence, renvoie une chaîne vide. (3) NE GÉNÈRE JAMAIS ces phrases: 谢谢观看, 感谢收看, 订阅, 请订阅, 下期再见, Thank you, Subscribe, 字幕. (4) Ignore: musique, applaudissements, rires, bruits de fond, respirations.",
|
||||
"stream": false,
|
||||
"response_format": "text"
|
||||
},
|
||||
|
||||
@ -70,6 +70,10 @@ bool Pipeline::start() {
|
||||
}
|
||||
|
||||
running_ = true;
|
||||
segment_id_ = 0;
|
||||
|
||||
// Start session logging
|
||||
session_logger_.startSession();
|
||||
|
||||
// Start background threads
|
||||
audio_thread_ = std::thread(&Pipeline::audioThread, this);
|
||||
@ -126,6 +130,9 @@ void Pipeline::stop() {
|
||||
transcript_ss << "transcripts/transcript_" << timestamp.str() << ".txt";
|
||||
ui_->exportTranscript(transcript_ss.str());
|
||||
}
|
||||
|
||||
// End session logging
|
||||
session_logger_.endSession();
|
||||
}
|
||||
|
||||
void Pipeline::audioThread() {
|
||||
@ -168,7 +175,19 @@ void Pipeline::processingThread() {
|
||||
|
||||
auto& chunk = chunk_opt.value();
|
||||
float duration = static_cast<float>(chunk.data.size()) / (chunk.sample_rate * chunk.channels);
|
||||
std::cout << "[Processing] Speech segment: " << duration << "s" << std::endl;
|
||||
|
||||
// Calculate audio RMS for logging
|
||||
float audio_rms = 0.0f;
|
||||
if (!chunk.data.empty()) {
|
||||
float sum_sq = 0.0f;
|
||||
for (float s : chunk.data) sum_sq += s * s;
|
||||
audio_rms = std::sqrt(sum_sq / chunk.data.size());
|
||||
}
|
||||
|
||||
std::cout << "[Processing] Speech segment: " << duration << "s (RMS=" << audio_rms << ")" << std::endl;
|
||||
|
||||
// Time Whisper
|
||||
auto whisper_start = std::chrono::steady_clock::now();
|
||||
|
||||
// Transcribe with Whisper
|
||||
auto whisper_result = whisper_client_->transcribe(
|
||||
@ -182,8 +201,13 @@ void Pipeline::processingThread() {
|
||||
config.getWhisperConfig().response_format
|
||||
);
|
||||
|
||||
auto whisper_end = std::chrono::steady_clock::now();
|
||||
int64_t whisper_latency = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
whisper_end - whisper_start).count();
|
||||
|
||||
if (!whisper_result.has_value()) {
|
||||
std::cerr << "Whisper transcription failed" << std::endl;
|
||||
session_logger_.logFilteredSegment("", "whisper_failed", duration, audio_rms);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -195,6 +219,7 @@ void Pipeline::processingThread() {
|
||||
size_t end = text.find_last_not_of(" \t\n\r");
|
||||
if (start == std::string::npos) {
|
||||
std::cout << "[Skip] Empty transcription" << std::endl;
|
||||
session_logger_.logFilteredSegment("", "empty", duration, audio_rms);
|
||||
continue;
|
||||
}
|
||||
text = text.substr(start, end - start + 1);
|
||||
@ -267,6 +292,7 @@ void Pipeline::processingThread() {
|
||||
|
||||
if (is_garbage) {
|
||||
std::cout << "[Skip] Filtered: " << text << std::endl;
|
||||
session_logger_.logFilteredSegment(text, "hallucination", duration, audio_rms);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -275,6 +301,9 @@ void Pipeline::processingThread() {
|
||||
ui_->addAudioCost(duration);
|
||||
}
|
||||
|
||||
// Time Claude
|
||||
auto claude_start = std::chrono::steady_clock::now();
|
||||
|
||||
// Translate with Claude
|
||||
auto claude_result = claude_client_->translate(
|
||||
text,
|
||||
@ -283,8 +312,13 @@ void Pipeline::processingThread() {
|
||||
config.getClaudeConfig().temperature
|
||||
);
|
||||
|
||||
auto claude_end = std::chrono::steady_clock::now();
|
||||
int64_t claude_latency = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
claude_end - claude_start).count();
|
||||
|
||||
if (!claude_result.has_value()) {
|
||||
std::cerr << "Claude translation failed" << std::endl;
|
||||
session_logger_.logFilteredSegment(text, "claude_failed", duration, audio_rms);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -308,8 +342,24 @@ void Pipeline::processingThread() {
|
||||
ui_->setAccumulatedText(accumulated_chinese_, accumulated_french_);
|
||||
ui_->addTranslation(text, claude_result->text);
|
||||
|
||||
// Log successful segment
|
||||
segment_id_++;
|
||||
SegmentLog seg;
|
||||
seg.id = segment_id_;
|
||||
seg.chinese = text;
|
||||
seg.french = claude_result->text;
|
||||
seg.audio_duration_sec = duration;
|
||||
seg.audio_rms = audio_rms;
|
||||
seg.whisper_latency_ms = whisper_latency;
|
||||
seg.claude_latency_ms = claude_latency;
|
||||
seg.was_filtered = false;
|
||||
seg.filter_reason = "";
|
||||
seg.timestamp = ""; // Will be set by logger
|
||||
session_logger_.logSegment(seg);
|
||||
|
||||
std::cout << "CN: " << text << std::endl;
|
||||
std::cout << "FR: " << claude_result->text << std::endl;
|
||||
std::cout << "[Latency] Whisper: " << whisper_latency << "ms, Claude: " << claude_latency << "ms" << std::endl;
|
||||
std::cout << "---" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
@ -6,6 +6,7 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "../utils/ThreadSafeQueue.h"
|
||||
#include "../utils/SessionLogger.h"
|
||||
|
||||
namespace secondvoice {
|
||||
|
||||
@ -60,6 +61,10 @@ private:
|
||||
// Simple accumulation
|
||||
std::string accumulated_chinese_;
|
||||
std::string accumulated_french_;
|
||||
|
||||
// Session logging
|
||||
SessionLogger session_logger_;
|
||||
int segment_id_ = 0;
|
||||
};
|
||||
|
||||
} // namespace secondvoice
|
||||
|
||||
196
src/utils/SessionLogger.cpp
Normal file
196
src/utils/SessionLogger.cpp
Normal file
@ -0,0 +1,196 @@
|
||||
#include "SessionLogger.h"
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
|
||||
namespace secondvoice {
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
SessionLogger::SessionLogger() = default;
|
||||
|
||||
SessionLogger::~SessionLogger() {
|
||||
if (is_active_) {
|
||||
endSession();
|
||||
}
|
||||
}
|
||||
|
||||
std::string SessionLogger::getCurrentTimestamp() const {
|
||||
auto now = std::chrono::system_clock::now();
|
||||
auto time_t = std::chrono::system_clock::to_time_t(now);
|
||||
auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
now.time_since_epoch()) % 1000;
|
||||
|
||||
std::stringstream ss;
|
||||
ss << std::put_time(std::localtime(&time_t), "%Y-%m-%d_%H%M%S");
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
void SessionLogger::startSession() {
|
||||
if (is_active_) {
|
||||
endSession();
|
||||
}
|
||||
|
||||
session_start_time_ = getCurrentTimestamp();
|
||||
session_path_ = "./sessions/" + session_start_time_;
|
||||
|
||||
// Create directories
|
||||
std::filesystem::create_directories(session_path_ + "/segments");
|
||||
|
||||
is_active_ = true;
|
||||
segment_count_ = 0;
|
||||
filtered_count_ = 0;
|
||||
total_audio_sec_ = 0.0f;
|
||||
total_whisper_ms_ = 0;
|
||||
total_claude_ms_ = 0;
|
||||
segments_.clear();
|
||||
|
||||
std::cout << "[Session] Started: " << session_path_ << std::endl;
|
||||
}
|
||||
|
||||
void SessionLogger::endSession() {
|
||||
if (!is_active_) return;
|
||||
|
||||
writeSessionJson();
|
||||
is_active_ = false;
|
||||
|
||||
std::cout << "[Session] Ended: " << segment_count_ << " segments, "
|
||||
<< filtered_count_ << " filtered, "
|
||||
<< total_audio_sec_ << "s audio" << std::endl;
|
||||
}
|
||||
|
||||
void SessionLogger::logSegment(const SegmentLog& segment) {
|
||||
if (!is_active_) return;
|
||||
|
||||
// Update counters
|
||||
segment_count_++;
|
||||
total_audio_sec_ += segment.audio_duration_sec;
|
||||
total_whisper_ms_ += segment.whisper_latency_ms;
|
||||
total_claude_ms_ += segment.claude_latency_ms;
|
||||
|
||||
// Store segment
|
||||
segments_.push_back(segment);
|
||||
|
||||
// Write individual segment JSON
|
||||
std::stringstream filename;
|
||||
filename << session_path_ << "/segments/"
|
||||
<< std::setfill('0') << std::setw(3) << segment.id << ".json";
|
||||
|
||||
json j;
|
||||
j["id"] = segment.id;
|
||||
j["chinese"] = segment.chinese;
|
||||
j["french"] = segment.french;
|
||||
j["audio_duration_sec"] = segment.audio_duration_sec;
|
||||
j["audio_rms"] = segment.audio_rms;
|
||||
j["whisper_latency_ms"] = segment.whisper_latency_ms;
|
||||
j["claude_latency_ms"] = segment.claude_latency_ms;
|
||||
j["was_filtered"] = segment.was_filtered;
|
||||
j["filter_reason"] = segment.filter_reason;
|
||||
j["timestamp"] = segment.timestamp;
|
||||
|
||||
std::ofstream file(filename.str());
|
||||
if (file.is_open()) {
|
||||
file << j.dump(2);
|
||||
file.close();
|
||||
}
|
||||
|
||||
std::cout << "[Session] Logged segment #" << segment.id
|
||||
<< " (" << segment.audio_duration_sec << "s)" << std::endl;
|
||||
}
|
||||
|
||||
void SessionLogger::logFilteredSegment(const std::string& chinese, const std::string& reason,
|
||||
float audio_duration, float audio_rms) {
|
||||
if (!is_active_) return;
|
||||
|
||||
filtered_count_++;
|
||||
total_audio_sec_ += audio_duration;
|
||||
|
||||
// Log filtered segment with special marker
|
||||
SegmentLog seg;
|
||||
seg.id = segment_count_ + filtered_count_;
|
||||
seg.chinese = chinese;
|
||||
seg.french = "[FILTERED]";
|
||||
seg.audio_duration_sec = audio_duration;
|
||||
seg.audio_rms = audio_rms;
|
||||
seg.whisper_latency_ms = 0;
|
||||
seg.claude_latency_ms = 0;
|
||||
seg.was_filtered = true;
|
||||
seg.filter_reason = reason;
|
||||
seg.timestamp = getCurrentTimestamp();
|
||||
|
||||
segments_.push_back(seg);
|
||||
|
||||
// Write filtered segment JSON
|
||||
std::stringstream filename;
|
||||
filename << session_path_ << "/segments/"
|
||||
<< std::setfill('0') << std::setw(3) << seg.id << "_filtered.json";
|
||||
|
||||
json j;
|
||||
j["id"] = seg.id;
|
||||
j["chinese"] = seg.chinese;
|
||||
j["filter_reason"] = reason;
|
||||
j["audio_duration_sec"] = audio_duration;
|
||||
j["audio_rms"] = audio_rms;
|
||||
j["timestamp"] = seg.timestamp;
|
||||
|
||||
std::ofstream file(filename.str());
|
||||
if (file.is_open()) {
|
||||
file << j.dump(2);
|
||||
file.close();
|
||||
}
|
||||
}
|
||||
|
||||
void SessionLogger::writeSessionJson() {
|
||||
json session;
|
||||
session["start_time"] = session_start_time_;
|
||||
session["end_time"] = getCurrentTimestamp();
|
||||
session["total_segments"] = segment_count_;
|
||||
session["filtered_segments"] = filtered_count_;
|
||||
session["total_audio_seconds"] = total_audio_sec_;
|
||||
session["avg_whisper_latency_ms"] = segment_count_ > 0 ?
|
||||
total_whisper_ms_ / segment_count_ : 0;
|
||||
session["avg_claude_latency_ms"] = segment_count_ > 0 ?
|
||||
total_claude_ms_ / segment_count_ : 0;
|
||||
|
||||
// Summary of all segments
|
||||
json segments_summary = json::array();
|
||||
for (const auto& seg : segments_) {
|
||||
json s;
|
||||
s["id"] = seg.id;
|
||||
s["chinese"] = seg.chinese;
|
||||
s["french"] = seg.french;
|
||||
s["duration"] = seg.audio_duration_sec;
|
||||
s["filtered"] = seg.was_filtered;
|
||||
if (seg.was_filtered) {
|
||||
s["filter_reason"] = seg.filter_reason;
|
||||
}
|
||||
segments_summary.push_back(s);
|
||||
}
|
||||
session["segments"] = segments_summary;
|
||||
|
||||
std::string filepath = session_path_ + "/session.json";
|
||||
std::ofstream file(filepath);
|
||||
if (file.is_open()) {
|
||||
file << session.dump(2);
|
||||
file.close();
|
||||
std::cout << "[Session] Wrote " << filepath << std::endl;
|
||||
}
|
||||
|
||||
// Also write plain text transcript
|
||||
std::string transcript_path = session_path_ + "/transcript.txt";
|
||||
std::ofstream transcript(transcript_path);
|
||||
if (transcript.is_open()) {
|
||||
transcript << "=== SecondVoice Session " << session_start_time_ << " ===\n\n";
|
||||
for (const auto& seg : segments_) {
|
||||
if (!seg.was_filtered) {
|
||||
transcript << "CN: " << seg.chinese << "\n";
|
||||
transcript << "FR: " << seg.french << "\n\n";
|
||||
}
|
||||
}
|
||||
transcript.close();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace secondvoice
|
||||
63
src/utils/SessionLogger.h
Normal file
63
src/utils/SessionLogger.h
Normal file
@ -0,0 +1,63 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
|
||||
namespace secondvoice {
|
||||
|
||||
struct SegmentLog {
|
||||
int id;
|
||||
std::string chinese;
|
||||
std::string french;
|
||||
float audio_duration_sec;
|
||||
float audio_rms;
|
||||
int64_t whisper_latency_ms;
|
||||
int64_t claude_latency_ms;
|
||||
bool was_filtered;
|
||||
std::string filter_reason;
|
||||
std::string timestamp;
|
||||
};
|
||||
|
||||
class SessionLogger {
|
||||
public:
|
||||
SessionLogger();
|
||||
~SessionLogger();
|
||||
|
||||
// Start a new session (creates directory)
|
||||
void startSession();
|
||||
|
||||
// End session (writes session.json summary)
|
||||
void endSession();
|
||||
|
||||
// Log a segment
|
||||
void logSegment(const SegmentLog& segment);
|
||||
|
||||
// Log a filtered/skipped segment
|
||||
void logFilteredSegment(const std::string& chinese, const std::string& reason,
|
||||
float audio_duration, float audio_rms);
|
||||
|
||||
// Get current session path
|
||||
std::string getSessionPath() const { return session_path_; }
|
||||
|
||||
// Check if session is active
|
||||
bool isActive() const { return is_active_; }
|
||||
|
||||
private:
|
||||
std::string getCurrentTimestamp() const;
|
||||
void writeSessionJson();
|
||||
|
||||
bool is_active_ = false;
|
||||
std::string session_path_;
|
||||
std::string session_start_time_;
|
||||
int segment_count_ = 0;
|
||||
int filtered_count_ = 0;
|
||||
float total_audio_sec_ = 0.0f;
|
||||
int total_whisper_ms_ = 0;
|
||||
int total_claude_ms_ = 0;
|
||||
|
||||
std::vector<SegmentLog> segments_;
|
||||
};
|
||||
|
||||
} // namespace secondvoice
|
||||
Loading…
Reference in New Issue
Block a user