GroveEngine/tests/integration/test_04_race_condition.cpp
StillHammer 360f39325b feat: Add Memory Leak Hunter test & fix critical ModuleLoader leaks
**Test Suite Completion - Scenario 5**

Add comprehensive memory leak detection test for hot-reload system with 200 reload cycles.

**New Test: test_05_memory_leak**
- 200 hot-reload cycles without recompilation
- Memory monitoring every 5 seconds (RSS, temp files, .so handles)
- Multi-threaded: Engine (60 FPS) + ReloadScheduler + MemoryMonitor
- Strict validation: <10 MB growth, <50 KB/reload, ≤2 temp files

**New Module: LeakTestModule**
- Controlled memory allocations (1 MB work buffer)
- Large state serialization (100 KB blob)
- Simulates real-world module behavior

**Critical Fix: ModuleLoader Memory Leaks** (src/ModuleLoader.cpp:34-39)
- Auto-unload previous library before loading new one
- Prevents library handle leaks (+200 .so mappings eliminated)
- Prevents temp file accumulation (778 files → 1-2 files)
- Memory leak reduced by 97%: 36.5 MB → 1.9 MB

**Test Results - Before Fix:**
- Memory growth: 36.5 MB 
- Per reload: 187.1 KB 
- Temp files: 778 
- Mapped .so: +200 

**Test Results - After Fix:**
- Memory growth: 1.9 MB 
- Per reload: 9.7 KB 
- Temp files: 1-2 
- Mapped .so: stable 
- 200/200 reloads successful (100%)

**Enhanced SystemUtils helpers:**
- countTempFiles(): Count temp module files
- getMappedLibraryCount(): Track .so handle leaks via /proc/self/maps

**Test Lifecycle Improvements:**
- test_04 & test_05: Destroy old module before reload to prevent use-after-free
- Proper state/config preservation across reload boundary

**Files Modified:**
- src/ModuleLoader.cpp: Auto-unload on load()
- tests/integration/test_05_memory_leak.cpp: NEW - 200 cycle leak detector
- tests/modules/LeakTestModule.cpp: NEW - Test module with allocations
- tests/helpers/SystemUtils.{h,cpp}: Memory monitoring functions
- tests/integration/test_04_race_condition.cpp: Fixed module lifecycle
- tests/CMakeLists.txt: Added test_05 and LeakTestModule

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-16 10:06:18 +08:00

388 lines
17 KiB
C++

#include "grove/ModuleLoader.h"
#include "grove/SequentialModuleSystem.h"
#include "grove/JsonDataNode.h"
#include "../helpers/TestMetrics.h"
#include "../helpers/TestAssertions.h"
#include "../helpers/TestReporter.h"
#include "../helpers/SystemUtils.h"
#include "../helpers/AutoCompiler.h"
#include <iostream>
#include <chrono>
#include <thread>
#include <filesystem>
#include <atomic>
#include <mutex>
using namespace grove;
using namespace TestHelpers;
int main() {
TestReporter reporter("Race Condition Hunter");
std::cout << "================================================================================\n";
std::cout << "TEST: Race Condition Hunter - Concurrent Compilation & Reload\n";
std::cout << "================================================================================\n\n";
// === CONFIGURATION ===
const int TOTAL_COMPILATIONS = 15; // Guaranteed completion within timeout
const int COMPILE_INTERVAL_MS = 1000; // 1 second between compilations
const int FILE_CHECK_INTERVAL_MS = 50; // Check file changes every 50ms
const float TARGET_FPS = 60.0f;
const float FRAME_TIME = 1.0f / TARGET_FPS;
std::string modulePath = "build/tests/libTestModule.so";
std::string sourcePath = "tests/modules/TestModule.cpp";
std::string buildDir = "build";
// === ATOMIC COUNTERS (Thread-safe) ===
std::atomic<int> reloadAttempts{0};
std::atomic<int> reloadSuccesses{0};
std::atomic<int> reloadFailures{0};
std::atomic<int> corruptedLoads{0};
std::atomic<int> crashes{0};
std::atomic<bool> engineRunning{true};
std::atomic<bool> watcherRunning{true};
// CRITICAL: Mutex to protect moduleSystem access between threads
std::mutex moduleSystemMutex;
// Reload timing
std::mutex reloadTimesMutex;
std::vector<float> reloadTimes;
// Metrics
TestMetrics metrics;
// === SETUP ===
std::cout << "Setup:\n";
std::cout << " Module path: " << modulePath << "\n";
std::cout << " Source path: " << sourcePath << "\n";
std::cout << " Compilations: " << TOTAL_COMPILATIONS << "\n";
std::cout << " Interval: " << COMPILE_INTERVAL_MS << "ms\n";
std::cout << " Expected time: ~" << (TOTAL_COMPILATIONS * COMPILE_INTERVAL_MS / 1000) << "s\n\n";
// Load module initially
ModuleLoader loader;
auto moduleSystem = std::make_unique<SequentialModuleSystem>();
try {
auto module = loader.load(modulePath, "TestModule", false);
nlohmann::json configJson = nlohmann::json::object();
auto config = std::make_unique<JsonDataNode>("config", configJson);
module->setConfiguration(*config, nullptr, nullptr);
moduleSystem->registerModule("TestModule", std::move(module));
std::cout << " ✓ Initial module loaded\n\n";
} catch (const std::exception& e) {
std::cerr << "❌ Failed to load initial module: " << e.what() << "\n";
return 1;
}
// === THREAD 1: AUTO-COMPILER ===
std::cout << "Starting AutoCompiler thread...\n";
AutoCompiler compiler("TestModule", buildDir, sourcePath);
compiler.start(TOTAL_COMPILATIONS, COMPILE_INTERVAL_MS);
// === THREAD 2: FILE WATCHER ===
std::cout << "Starting FileWatcher thread...\n";
std::thread watcherThread([&]() {
try {
auto lastWriteTime = std::filesystem::last_write_time(modulePath);
while (watcherRunning.load() && engineRunning.load()) {
try {
auto currentTime = std::filesystem::last_write_time(modulePath);
if (currentTime != lastWriteTime) {
reloadAttempts++;
// Measure reload time
auto reloadStart = std::chrono::high_resolution_clock::now();
try {
// CRITICAL: Lock moduleSystem during entire reload
std::lock_guard<std::mutex> lock(moduleSystemMutex);
// Extract module and save state
auto module = moduleSystem->extractModule();
auto state = module->getState();
// CRITICAL: Destroy old module BEFORE reloading
// The loader.load() will unload the old .so
module.reset();
// Reload
auto newModule = loader.load(modulePath, "TestModule", true);
// Check if module loaded correctly
if (!newModule) {
corruptedLoads++;
reloadFailures++;
// Can't recover - old module already destroyed
} else {
// VALIDATE MODULE INTEGRITY
bool isCorrupted = false;
try {
// Test 1: Can we get health status?
auto health = newModule->getHealthStatus();
std::string version = health->getString("version", "");
// Test 2: Is version valid?
if (version.empty() || version == "unknown") {
isCorrupted = true;
}
// Test 3: Can we set configuration?
nlohmann::json configJson;
configJson["test"] = "validation";
auto testConfig = std::make_unique<JsonDataNode>("config", configJson);
newModule->setConfiguration(*testConfig, nullptr, nullptr);
} catch (const std::exception& e) {
// Module crashes on basic operations = corrupted
isCorrupted = true;
}
if (isCorrupted) {
corruptedLoads++;
reloadFailures++;
// Can't recover - old module already destroyed
} else {
// Module is valid, restore state and register
newModule->setState(*state);
moduleSystem->registerModule("TestModule", std::move(newModule));
reloadSuccesses++;
// Record reload time
auto reloadEnd = std::chrono::high_resolution_clock::now();
float reloadTimeMs = std::chrono::duration<float, std::milli>(reloadEnd - reloadStart).count();
{
std::lock_guard<std::mutex> timeLock(reloadTimesMutex);
reloadTimes.push_back(reloadTimeMs);
}
}
}
} catch (const std::exception& e) {
reloadFailures++;
// Module might already be registered, continue
}
lastWriteTime = currentTime;
}
} catch (const std::filesystem::filesystem_error&) {
// File might be being written, ignore
}
std::this_thread::sleep_for(std::chrono::milliseconds(FILE_CHECK_INTERVAL_MS));
}
} catch (const std::exception& e) {
std::cerr << "[FileWatcher] Exception: " << e.what() << "\n";
}
});
// === THREAD 3: ENGINE LOOP ===
std::cout << "Starting Engine thread (60 FPS)...\n\n";
std::thread engineThread([&]() {
try {
auto lastMemoryCheck = std::chrono::steady_clock::now();
while (engineRunning.load()) {
auto frameStart = std::chrono::high_resolution_clock::now();
try {
// TRY to lock moduleSystem (non-blocking)
// If reload is happening, skip this frame
if (moduleSystemMutex.try_lock()) {
try {
moduleSystem->processModules(FRAME_TIME);
moduleSystemMutex.unlock();
} catch (const std::exception& e) {
moduleSystemMutex.unlock();
throw;
}
}
// else: reload in progress, skip frame
} catch (const std::exception& e) {
crashes++;
std::cerr << "[Engine] Crash detected: " << e.what() << "\n";
}
auto frameEnd = std::chrono::high_resolution_clock::now();
float frameTime = std::chrono::duration<float, std::milli>(frameEnd - frameStart).count();
metrics.recordFPS(1000.0f / std::max(frameTime, 0.1f));
// Check memory every second
auto now = std::chrono::steady_clock::now();
if (std::chrono::duration_cast<std::chrono::seconds>(now - lastMemoryCheck).count() >= 1) {
metrics.recordMemoryUsage(getCurrentMemoryUsage());
lastMemoryCheck = now;
}
// Sleep to maintain target FPS (if frame finished early)
auto targetFrameTime = std::chrono::milliseconds(static_cast<int>(FRAME_TIME * 1000));
auto elapsed = frameEnd - frameStart;
if (elapsed < targetFrameTime) {
std::this_thread::sleep_for(targetFrameTime - elapsed);
}
}
} catch (const std::exception& e) {
std::cerr << "[Engine] Thread exception: " << e.what() << "\n";
}
});
// === MONITORING LOOP ===
std::cout << "Test running...\n";
auto startTime = std::chrono::steady_clock::now();
int lastPrintedPercent = 0;
const int MAX_TEST_TIME_SECONDS = 90; // Maximum 1.5 minutes (allows all 20 compilations)
while (compiler.isRunning() || compiler.getCurrentIteration() < TOTAL_COMPILATIONS) {
std::this_thread::sleep_for(std::chrono::seconds(2));
int currentIteration = compiler.getCurrentIteration();
int percent = (currentIteration * 100) / TOTAL_COMPILATIONS;
// Check for timeout
auto now = std::chrono::steady_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(now - startTime).count();
if (elapsed > MAX_TEST_TIME_SECONDS) {
std::cout << "\n⚠️ Test timeout after " << elapsed << "s - stopping...\n";
break;
}
// Print progress every 10%
if (percent >= lastPrintedPercent + 10 && percent <= 100) {
std::cout << "\nProgress: " << percent << "% (" << currentIteration << "/" << TOTAL_COMPILATIONS << " compilations)\n";
std::cout << " Elapsed: " << elapsed << "s\n";
std::cout << " Compilations: " << compiler.getSuccessCount() << " OK, " << compiler.getFailureCount() << " FAIL\n";
std::cout << " Reloads: " << reloadSuccesses.load() << " OK, " << reloadFailures.load() << " FAIL\n";
std::cout << " Corrupted: " << corruptedLoads.load() << "\n";
std::cout << " Crashes: " << crashes.load() << "\n";
lastPrintedPercent = percent;
}
}
// === CLEANUP ===
std::cout << "\n\nStopping threads...\n";
compiler.stop();
watcherRunning = false;
engineRunning = false;
if (watcherThread.joinable()) {
watcherThread.join();
}
if (engineThread.joinable()) {
engineThread.join();
}
std::cout << " ✓ All threads stopped\n\n";
// === CALCULATE STATISTICS ===
float compileSuccessRate = (compiler.getSuccessCount() * 100.0f) / std::max(1, TOTAL_COMPILATIONS);
float reloadSuccessRate = (reloadSuccesses.load() * 100.0f) / std::max(1, reloadAttempts.load());
float avgReloadTime = 0.0f;
{
std::lock_guard<std::mutex> lock(reloadTimesMutex);
if (!reloadTimes.empty()) {
float sum = 0.0f;
for (float t : reloadTimes) {
sum += t;
}
avgReloadTime = sum / reloadTimes.size();
}
}
auto endTime = std::chrono::steady_clock::now();
auto totalTimeSeconds = std::chrono::duration_cast<std::chrono::seconds>(endTime - startTime).count();
// === PRINT SUMMARY ===
std::cout << "================================================================================\n";
std::cout << "RACE CONDITION HUNTER SUMMARY\n";
std::cout << "================================================================================\n\n";
std::cout << "Duration: " << totalTimeSeconds << "s\n\n";
std::cout << "Compilations:\n";
std::cout << " Total: " << TOTAL_COMPILATIONS << "\n";
std::cout << " Successes: " << compiler.getSuccessCount() << " (" << std::fixed << std::setprecision(1) << compileSuccessRate << "%)\n";
std::cout << " Failures: " << compiler.getFailureCount() << "\n\n";
std::cout << "Reloads:\n";
std::cout << " Attempts: " << reloadAttempts.load() << "\n";
std::cout << " Successes: " << reloadSuccesses.load() << " (" << std::fixed << std::setprecision(1) << reloadSuccessRate << "%)\n";
std::cout << " Failures: " << reloadFailures.load() << "\n";
std::cout << " Corrupted: " << corruptedLoads.load() << "\n";
std::cout << " Avg time: " << std::fixed << std::setprecision(0) << avgReloadTime << "ms\n\n";
std::cout << "Stability:\n";
std::cout << " Crashes: " << crashes.load() << "\n";
std::cout << " Avg FPS: " << std::fixed << std::setprecision(1) << metrics.getFPSAvg() << "\n";
std::cout << " Memory: " << std::fixed << std::setprecision(2) << metrics.getMemoryGrowth() << " MB\n\n";
// === ASSERTIONS ===
bool passed = true;
std::cout << "Validating results...\n";
// MUST PASS criteria
if (compileSuccessRate < 95.0f) {
std::cout << " ❌ Compile success rate too low: " << compileSuccessRate << "% (need > 95%)\n";
passed = false;
} else {
std::cout << " ✓ Compile success rate: " << compileSuccessRate << "%\n";
}
if (corruptedLoads.load() > 0) {
std::cout << " ❌ Corrupted loads detected: " << corruptedLoads.load() << " (need 0)\n";
passed = false;
} else {
std::cout << " ✓ No corrupted loads\n";
}
if (crashes.load() > 0) {
std::cout << " ❌ Crashes detected: " << crashes.load() << " (need 0)\n";
passed = false;
} else {
std::cout << " ✓ No crashes\n";
}
if (reloadAttempts.load() > 0 && reloadSuccessRate < 99.0f) {
std::cout << " ❌ Reload success rate too low: " << reloadSuccessRate << "% (need > 99%)\n";
passed = false;
} else if (reloadAttempts.load() > 0) {
std::cout << " ✓ Reload success rate: " << reloadSuccessRate << "%\n";
}
// File stability validation: reload time should be >= 100ms
// This proves that ModuleLoader is waiting for file stability
if (reloadAttempts.load() > 0) {
if (avgReloadTime < 100.0f) {
std::cout << " ❌ Reload time too fast: " << avgReloadTime << "ms (need >= 100ms)\n";
std::cout << " File stability check is NOT working properly!\n";
passed = false;
} else if (avgReloadTime > 600.0f) {
std::cout << " ⚠️ Reload time very slow: " << avgReloadTime << "ms (> 600ms)\n";
std::cout << " File stability might be waiting too long\n";
} else {
std::cout << " ✓ Reload time: " << avgReloadTime << "ms (file stability working)\n";
}
}
std::cout << "\n";
// === FINAL RESULT ===
std::cout << "================================================================================\n";
if (passed) {
std::cout << "Result: ✅ PASSED\n";
} else {
std::cout << "Result: ❌ FAILED\n";
}
std::cout << "================================================================================\n";
return passed ? 0 : 1;
}