GroveEngine/tests/benchmarks/benchmark_threaded_vs_sequential.cpp
StillHammer aefd7921b2 fix: Critical race conditions in ThreadedModuleSystem and logger
Fixed two critical race conditions that prevented multi-threaded module execution:

## Bug #1: ThreadedModuleSystem::registerModule() race condition

**Symptom:** Deadlock on first processModules() call
**Root Cause:** Worker thread started before being added to workers vector
**Fix:** Add worker to vector BEFORE spawning thread (src/ThreadedModuleSystem.cpp:102-108)

Before:
- Create worker → Start thread → Add to vector (RACE!)
- Thread accesses workers[index] before push_back completes

After:
- Create worker → Add to vector → Start thread (SAFE)
- Thread guaranteed to find worker in vector

## Bug #2: stillhammer::createLogger() race condition

**Symptom:** Deadlock when multiple threads create loggers simultaneously
**Root Cause:** Check-then-register pattern without mutex protection
**Fix:** Added static mutex around spdlog::get() + register_logger() (external/StillHammer/logger/src/Logger.cpp:94-96)

Before:
- Thread 1: check → create → register
- Thread 2: check → create → register (RACE on spdlog registry!)

After:
- Mutex protects entire check-then-register critical section

## Validation & Testing

Added comprehensive test suite:
- test_threaded_module_system.cpp (6 unit tests)
- test_threaded_stress.cpp (5 stress tests: 50 modules × 1000 frames)
- test_logger_threadsafe.cpp (concurrent logger creation)
- benchmark_threaded_vs_sequential.cpp (performance comparison)
- docs/THREADED_MODULE_SYSTEM_VALIDATION.md (full validation report)

All tests passing (100%):
- ThreadedModuleSystem:  0.15s
- ThreadedStress:  7.64s
- LoggerThreadSafe:  0.13s

## Impact

ThreadedModuleSystem now PRODUCTION READY:
- Thread-safe module registration
- Stable parallel execution (validated with 50,000+ operations)
- Hot-reload working (100 cycles tested)
- Logger thread-safe for concurrent module initialization

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-19 07:37:31 +07:00

303 lines
12 KiB
C++
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "grove/ThreadedModuleSystem.h"
#include "grove/SequentialModuleSystem.h"
#include "grove/JsonDataNode.h"
#include "../helpers/TestAssertions.h"
#include <spdlog/spdlog.h>
#include <spdlog/sinks/stdout_color_sinks.h>
#include <iostream>
#include <chrono>
#include <thread>
#include <iomanip>
#include <vector>
using namespace grove;
// ============================================================================
// Benchmark Module: Simulates realistic game module workload
// ============================================================================
class BenchmarkModule : public IModule {
private:
int counter = 0;
std::string name;
IIO* io = nullptr;
std::shared_ptr<spdlog::logger> logger;
int workDelayMs = 0;
public:
BenchmarkModule(std::string moduleName, int workMs = 5)
: name(std::move(moduleName)), workDelayMs(workMs) {
logger = spdlog::get("BenchmarkModule_" + name);
if (!logger) {
logger = spdlog::stdout_color_mt("BenchmarkModule_" + name);
logger->set_level(spdlog::level::off); // Disable logging for benchmarks
}
}
void process(const IDataNode& input) override {
counter++;
// Simulate realistic work (e.g., physics, AI, rendering preparation)
if (workDelayMs > 0) {
std::this_thread::sleep_for(std::chrono::milliseconds(workDelayMs));
}
}
void setConfiguration(const IDataNode& configNode, IIO* ioLayer, ITaskScheduler* scheduler) override {
io = ioLayer;
}
const IDataNode& getConfiguration() override {
static JsonDataNode emptyConfig("config", nlohmann::json{});
return emptyConfig;
}
std::unique_ptr<IDataNode> getHealthStatus() override {
return std::make_unique<JsonDataNode>("health", nlohmann::json{{"status", "healthy"}});
}
void shutdown() override {}
std::unique_ptr<IDataNode> getState() override {
return std::make_unique<JsonDataNode>("state", nlohmann::json{{"counter", counter}});
}
void setState(const IDataNode& state) override {
counter = state.getInt("counter", 0);
}
std::string getType() const override {
return "BenchmarkModule";
}
bool isIdle() const override {
return true;
}
int getCounter() const { return counter; }
};
// ============================================================================
// Benchmark Runner
// ============================================================================
struct BenchmarkResult {
int numModules;
int workMs;
int numFrames;
float sequentialTime;
float threadedTime;
float speedup;
float sequentialAvgFrame;
float threadedAvgFrame;
};
BenchmarkResult runBenchmark(int numModules, int workMs, int numFrames) {
BenchmarkResult result;
result.numModules = numModules;
result.workMs = workMs;
result.numFrames = numFrames;
// --- Sequential System ---
{
auto system = std::make_unique<SequentialModuleSystem>();
for (int i = 0; i < numModules; i++) {
auto module = std::make_unique<BenchmarkModule>("SeqModule_" + std::to_string(i), workMs);
system->registerModule("SeqModule_" + std::to_string(i), std::move(module));
}
auto start = std::chrono::high_resolution_clock::now();
for (int frame = 0; frame < numFrames; frame++) {
system->processModules(1.0f / 60.0f);
}
auto end = std::chrono::high_resolution_clock::now();
result.sequentialTime = std::chrono::duration<float, std::milli>(end - start).count();
result.sequentialAvgFrame = result.sequentialTime / numFrames;
}
// --- Threaded System ---
{
auto system = std::make_unique<ThreadedModuleSystem>();
for (int i = 0; i < numModules; i++) {
auto module = std::make_unique<BenchmarkModule>("ThreadedModule_" + std::to_string(i), workMs);
system->registerModule("ThreadedModule_" + std::to_string(i), std::move(module));
}
auto start = std::chrono::high_resolution_clock::now();
for (int frame = 0; frame < numFrames; frame++) {
system->processModules(1.0f / 60.0f);
}
auto end = std::chrono::high_resolution_clock::now();
result.threadedTime = std::chrono::duration<float, std::milli>(end - start).count();
result.threadedAvgFrame = result.threadedTime / numFrames;
}
// Calculate speedup
result.speedup = result.sequentialTime / result.threadedTime;
return result;
}
// ============================================================================
// Print Results
// ============================================================================
void printResultsTable(const std::vector<BenchmarkResult>& results) {
std::cout << "\n";
std::cout << "┌─────────────────────────────────────────────────────────────────────────────────────────┐\n";
std::cout << "│ Sequential vs Threaded Performance Comparison │\n";
std::cout << "├────────┬────────┬─────────┬──────────────┬──────────────┬──────────┬──────────┬─────────┤\n";
std::cout << "│ Modules│ Work │ Frames │ Sequential │ Threaded │ Seq/Frame│ Thr/Frame│ Speedup │\n";
std::cout << "│ │ (ms) │ │ Total (ms) │ Total (ms) │ (ms) │ (ms) │ │\n";
std::cout << "├────────┼────────┼─────────┼──────────────┼──────────────┼──────────┼──────────┼─────────┤\n";
for (const auto& r : results) {
std::cout << "" << std::setw(6) << r.numModules << "";
std::cout << std::setw(6) << r.workMs << "";
std::cout << std::setw(7) << r.numFrames << "";
std::cout << std::setw(12) << std::fixed << std::setprecision(2) << r.sequentialTime << "";
std::cout << std::setw(12) << std::fixed << std::setprecision(2) << r.threadedTime << "";
std::cout << std::setw(8) << std::fixed << std::setprecision(3) << r.sequentialAvgFrame << "";
std::cout << std::setw(8) << std::fixed << std::setprecision(3) << r.threadedAvgFrame << "";
std::cout << std::setw(7) << std::fixed << std::setprecision(2) << r.speedup << "x │\n";
}
std::cout << "└────────┴────────┴─────────┴──────────────┴──────────────┴──────────┴──────────┴─────────┘\n";
}
void printCSV(const std::vector<BenchmarkResult>& results) {
std::cout << "\n=== CSV Output ===\n";
std::cout << "Modules,WorkMs,Frames,SequentialTotal,ThreadedTotal,SequentialAvg,ThreadedAvg,Speedup\n";
for (const auto& r : results) {
std::cout << r.numModules << ","
<< r.workMs << ","
<< r.numFrames << ","
<< std::fixed << std::setprecision(2) << r.sequentialTime << ","
<< r.threadedTime << ","
<< std::setprecision(3) << r.sequentialAvgFrame << ","
<< r.threadedAvgFrame << ","
<< std::setprecision(2) << r.speedup << "\n";
}
}
void printAnalysis(const std::vector<BenchmarkResult>& results) {
std::cout << "\n=== Performance Analysis ===\n\n";
// Find best speedup
auto bestSpeedup = *std::max_element(results.begin(), results.end(),
[](const BenchmarkResult& a, const BenchmarkResult& b) {
return a.speedup < b.speedup;
});
std::cout << "Best Speedup: " << std::fixed << std::setprecision(2)
<< bestSpeedup.speedup << "x with "
<< bestSpeedup.numModules << " modules ("
<< bestSpeedup.workMs << "ms work)\n";
// Overhead analysis for 1 module
auto singleModule = std::find_if(results.begin(), results.end(),
[](const BenchmarkResult& r) { return r.numModules == 1; });
if (singleModule != results.end()) {
float overhead = singleModule->threadedTime - singleModule->sequentialTime;
float overheadPercent = (overhead / singleModule->sequentialTime) * 100.0f;
std::cout << "\nSingle Module Overhead:\n";
std::cout << " Sequential: " << std::fixed << std::setprecision(2)
<< singleModule->sequentialTime << "ms\n";
std::cout << " Threaded: " << singleModule->threadedTime << "ms\n";
std::cout << " Overhead: " << overhead << "ms ("
<< std::setprecision(1) << overheadPercent << "%)\n";
}
// Parallel efficiency analysis
std::cout << "\nParallel Efficiency:\n";
for (const auto& r : results) {
if (r.numModules > 1) {
float efficiency = (r.speedup / r.numModules) * 100.0f;
std::cout << " " << r.numModules << " modules: "
<< std::fixed << std::setprecision(1) << efficiency << "% efficient\n";
}
}
// Recommendations
std::cout << "\nRecommendations:\n";
if (bestSpeedup.speedup >= 2.0f) {
std::cout << " ✓ ThreadedModuleSystem shows excellent parallel performance\n";
} else if (bestSpeedup.speedup >= 1.5f) {
std::cout << " ✓ ThreadedModuleSystem shows good parallel performance\n";
} else {
std::cout << " ⚠️ Parallel overhead may be high - investigate synchronization costs\n";
}
if (singleModule != results.end() && singleModule->speedup < 0.8f) {
std::cout << " ⚠️ Significant overhead for single module - use SequentialModuleSystem for 1 module\n";
}
std::cout << " ThreadedModuleSystem is best for 2-8 modules with moderate workloads\n";
}
// ============================================================================
// Main Benchmark Runner
// ============================================================================
int main() {
std::cout << "================================================================================\n";
std::cout << "ThreadedModuleSystem vs SequentialModuleSystem - Performance Benchmark\n";
std::cout << "================================================================================\n";
// Disable verbose logging for benchmarks
spdlog::set_level(spdlog::level::warn);
std::vector<BenchmarkResult> results;
// Benchmark configurations: (modules, work_ms, frames)
std::vector<std::tuple<int, int, int>> configs = {
{1, 5, 50}, // 1 module, 5ms work, 50 frames
{2, 5, 50}, // 2 modules, 5ms work, 50 frames
{4, 5, 50}, // 4 modules, 5ms work, 50 frames
{8, 5, 50}, // 8 modules, 5ms work, 50 frames
{4, 10, 20}, // 4 modules, 10ms work, 20 frames (heavier load)
{8, 10, 20}, // 8 modules, 10ms work, 20 frames
};
std::cout << "\nRunning benchmarks...\n";
int totalBenchmarks = configs.size();
int currentBenchmark = 0;
for (const auto& config : configs) {
int modules = std::get<0>(config);
int workMs = std::get<1>(config);
int frames = std::get<2>(config);
currentBenchmark++;
std::cout << " [" << currentBenchmark << "/" << totalBenchmarks << "] "
<< modules << " modules, " << workMs << "ms work, "
<< frames << " frames... ";
std::cout.flush();
auto result = runBenchmark(modules, workMs, frames);
results.push_back(result);
std::cout << "Speedup: " << std::fixed << std::setprecision(2)
<< result.speedup << "x\n";
}
// Print results
printResultsTable(results);
printAnalysis(results);
printCSV(results);
std::cout << "\n================================================================================\n";
std::cout << "Benchmark Complete!\n";
std::cout << "================================================================================\n";
return 0;
}