diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 55ef07c..1747545 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -206,3 +206,43 @@ add_dependencies(test_05_memory_leak LeakTestModule) # CTest integration add_test(NAME MemoryLeakHunter COMMAND test_05_memory_leak) + +# Memory leak profiler (detailed analysis) +add_executable(profile_memory_leak + profile_memory_leak.cpp +) + +target_link_libraries(profile_memory_leak PRIVATE + test_helpers + GroveEngine::core + GroveEngine::impl +) + +add_dependencies(profile_memory_leak LeakTestModule) + +# ErrorRecoveryModule pour test de recovery automatique +add_library(ErrorRecoveryModule SHARED + modules/ErrorRecoveryModule.cpp +) + +target_link_libraries(ErrorRecoveryModule PRIVATE + GroveEngine::core + GroveEngine::impl + spdlog::spdlog +) + +# Test 06: Error Recovery - Crash detection & auto-recovery +add_executable(test_06_error_recovery + integration/test_06_error_recovery.cpp +) + +target_link_libraries(test_06_error_recovery PRIVATE + test_helpers + GroveEngine::core + GroveEngine::impl +) + +add_dependencies(test_06_error_recovery ErrorRecoveryModule) + +# CTest integration +add_test(NAME ErrorRecovery COMMAND test_06_error_recovery) diff --git a/tests/integration/test_06_error_recovery.cpp b/tests/integration/test_06_error_recovery.cpp new file mode 100644 index 0000000..b1748ea --- /dev/null +++ b/tests/integration/test_06_error_recovery.cpp @@ -0,0 +1,272 @@ +#include "grove/ModuleLoader.h" +#include "grove/SequentialModuleSystem.h" +#include "grove/JsonDataNode.h" +#include "../helpers/TestMetrics.h" +#include "../helpers/TestAssertions.h" +#include "../helpers/TestReporter.h" +#include "../helpers/SystemUtils.h" +#include +#include +#include +#include + +using namespace grove; + +/** + * Test 06: Error Recovery + * + * Objectif: Valider que le système peut détecter et récupérer automatiquement + * d'un crash de module via hot-reload. + * + * Scénario: + * 1. Charger ErrorRecoveryModule avec crash planifié à frame 60 + * 2. Lancer execution jusqu'au crash + * 3. Détecter le crash (exception) + * 4. Trigger hot-reload automatique + * 5. Vérifier que le module récupère (auto-recovery) + * 6. Continuer execution normalement + * + * Métriques: + * - Crash detection time + * - Recovery success rate + * - State preservation après recovery + * - Stabilité du moteur + */ + +int main() { + TestReporter reporter("Error Recovery"); + TestMetrics metrics; + + std::cout << "================================================================================\n"; + std::cout << "TEST: Error Recovery - Crash Detection & Auto-Recovery\n"; + std::cout << "================================================================================\n\n"; + + // === SETUP === + std::cout << "Setup: Loading ErrorRecoveryModule with crash trigger...\n"; + + ModuleLoader loader; + auto moduleSystem = std::make_unique(); + + // Charger module + std::string modulePath = "build/tests/libErrorRecoveryModule.so"; + auto module = loader.load(modulePath, "ErrorRecoveryModule", false); + + // Config: crash à frame 60, type runtime_error + nlohmann::json configJson; + configJson["crashAtFrame"] = 60; + configJson["crashType"] = 0; // runtime_error + configJson["enableAutoRecovery"] = true; + configJson["versionTag"] = "v1.0"; + auto config = std::make_unique("config", configJson); + + module->setConfiguration(*config, nullptr, nullptr); + moduleSystem->registerModule("ErrorRecoveryModule", std::move(module)); + + std::cout << " ✓ Module loaded with crash trigger at frame 60\n\n"; + + // === PHASE 1: Run until crash === + std::cout << "Phase 1: Running until crash (target frame: 60)...\n"; + + bool crashDetected = false; + int crashFrame = -1; + auto crashDetectionStart = std::chrono::high_resolution_clock::now(); + + for (int frame = 1; frame <= 100; frame++) { + try { + auto frameStart = std::chrono::high_resolution_clock::now(); + + moduleSystem->processModules(1.0f / 60.0f); + + auto frameEnd = std::chrono::high_resolution_clock::now(); + float frameTime = std::chrono::duration(frameEnd - frameStart).count(); + metrics.recordFPS(1000.0f / frameTime); + + if (frame % 20 == 0) { + std::cout << " Frame " << frame << "/100 - OK\n"; + } + + } catch (const std::exception& e) { + // CRASH DÉTECTÉ ! + auto crashDetectionEnd = std::chrono::high_resolution_clock::now(); + float detectionTime = std::chrono::duration( + crashDetectionEnd - crashDetectionStart).count(); + + crashDetected = true; + crashFrame = frame; + + std::cout << "\n💥 CRASH DETECTED at frame " << frame << "\n"; + std::cout << " Exception: " << e.what() << "\n"; + std::cout << " Detection time: " << detectionTime << "ms\n\n"; + + metrics.recordCrash("runtime_error at frame " + std::to_string(frame)); + reporter.addMetric("crash_detection_time_ms", detectionTime); + + break; + } + } + + ASSERT_TRUE(crashDetected, "Crash should have been detected"); + ASSERT_EQ(crashFrame, 60, "Crash should occur at frame 60"); + reporter.addAssertion("crash_detected", crashDetected); + reporter.addAssertion("crash_at_expected_frame", crashFrame == 60); + + // === PHASE 2: Extract state before recovery === + std::cout << "Phase 2: Extracting state before recovery...\n"; + + auto crashedModule = moduleSystem->extractModule(); + auto preRecoveryState = crashedModule->getState(); + + auto* jsonNodeBefore = dynamic_cast(preRecoveryState.get()); + if (!jsonNodeBefore) { + std::cerr << "❌ Failed to extract state before recovery\n"; + return 1; + } + + const auto& stateBefore = jsonNodeBefore->getJsonData(); + int frameCountBefore = stateBefore.value("frameCount", 0); + int crashCountBefore = stateBefore.value("crashCount", 0); + bool hasCrashedBefore = stateBefore.value("hasCrashed", false); + + std::cout << " State before recovery:\n"; + std::cout << " Frame count: " << frameCountBefore << "\n"; + std::cout << " Crash count: " << crashCountBefore << "\n"; + std::cout << " Has crashed: " << (hasCrashedBefore ? "YES" : "NO") << "\n\n"; + + ASSERT_TRUE(hasCrashedBefore, "Module should be in crashed state"); + + // === PHASE 3: Trigger hot-reload (recovery) === + std::cout << "Phase 3: Triggering hot-reload for recovery...\n"; + + auto recoveryStart = std::chrono::high_resolution_clock::now(); + + // Hot-reload via ModuleLoader + auto recoveredModule = loader.reload(std::move(crashedModule)); + + auto recoveryEnd = std::chrono::high_resolution_clock::now(); + float recoveryTime = std::chrono::duration(recoveryEnd - recoveryStart).count(); + + std::cout << " ✓ Hot-reload completed in " << recoveryTime << "ms\n"; + + metrics.recordReloadTime(recoveryTime); + reporter.addMetric("recovery_time_ms", recoveryTime); + + // Ré-enregistrer module récupéré + moduleSystem->registerModule("ErrorRecoveryModule", std::move(recoveredModule)); + + // === PHASE 4: Verify recovery === + std::cout << "\nPhase 4: Verifying recovery...\n"; + + auto recoveredModuleRef = moduleSystem->extractModule(); + auto postRecoveryState = recoveredModuleRef->getState(); + + auto* jsonNodeAfter = dynamic_cast(postRecoveryState.get()); + if (!jsonNodeAfter) { + std::cerr << "❌ Failed to extract state after recovery\n"; + return 1; + } + + const auto& stateAfter = jsonNodeAfter->getJsonData(); + int frameCountAfter = stateAfter.value("frameCount", 0); + int crashCountAfter = stateAfter.value("crashCount", 0); + int recoveryCountAfter = stateAfter.value("recoveryCount", 0); + bool hasCrashedAfter = stateAfter.value("hasCrashed", false); + int crashAtFrameAfter = stateAfter.value("crashAtFrame", -1); + + std::cout << " State after recovery:\n"; + std::cout << " Frame count: " << frameCountAfter << "\n"; + std::cout << " Crash count: " << crashCountAfter << "\n"; + std::cout << " Recovery count: " << recoveryCountAfter << "\n"; + std::cout << " Has crashed: " << (hasCrashedAfter ? "YES" : "NO") << "\n"; + std::cout << " Crash trigger: " << crashAtFrameAfter << "\n\n"; + + // Vérifications de recovery + ASSERT_EQ(frameCountAfter, frameCountBefore, "Frame count should be preserved"); + ASSERT_FALSE(hasCrashedAfter, "Module should no longer be in crashed state"); + ASSERT_EQ(recoveryCountAfter, 1, "Recovery count should be 1"); + ASSERT_EQ(crashAtFrameAfter, -1, "Crash trigger should be disabled"); + + reporter.addAssertion("frame_count_preserved", frameCountAfter == frameCountBefore); + reporter.addAssertion("crash_state_cleared", !hasCrashedAfter); + reporter.addAssertion("recovery_count_incremented", recoveryCountAfter == 1); + reporter.addAssertion("crash_trigger_disabled", crashAtFrameAfter == -1); + + std::cout << " ✅ RECOVERY SUCCESSFUL - Module is healthy again\n\n"; + + // Ré-enregistrer pour phase 5 + moduleSystem->registerModule("ErrorRecoveryModule", std::move(recoveredModuleRef)); + + // === PHASE 5: Continue execution (stability check) === + std::cout << "Phase 5: Stability check - Running 120 more frames...\n"; + + bool stableExecution = true; + int framesAfterRecovery = 0; + + for (int frame = 1; frame <= 120; frame++) { + try { + auto frameStart = std::chrono::high_resolution_clock::now(); + + moduleSystem->processModules(1.0f / 60.0f); + + auto frameEnd = std::chrono::high_resolution_clock::now(); + float frameTime = std::chrono::duration(frameEnd - frameStart).count(); + metrics.recordFPS(1000.0f / frameTime); + + framesAfterRecovery++; + + if (frame % 30 == 0) { + std::cout << " Frame " << frame << "/120 - Stable\n"; + } + + } catch (const std::exception& e) { + std::cout << "\n❌ UNEXPECTED CRASH after recovery at frame " << frame << "\n"; + std::cout << " Exception: " << e.what() << "\n"; + stableExecution = false; + break; + } + } + + ASSERT_TRUE(stableExecution, "Module should execute stably after recovery"); + ASSERT_EQ(framesAfterRecovery, 120, "Should complete all 120 frames"); + + reporter.addAssertion("stable_after_recovery", stableExecution); + reporter.addMetric("frames_after_recovery", static_cast(framesAfterRecovery)); + + std::cout << " ✅ Stability verified - " << framesAfterRecovery << " frames executed without issues\n\n"; + + // === VÉRIFICATIONS FINALES === + std::cout << "Final verifications...\n"; + + // Memory growth + size_t memGrowth = metrics.getMemoryGrowth(); + float memGrowthMB = memGrowth / (1024.0f * 1024.0f); + ASSERT_LT(memGrowthMB, 10.0f, "Memory growth should be < 10MB"); + reporter.addMetric("memory_growth_mb", memGrowthMB); + + // FPS (moins strict pour test de recovery - focus sur stability) + float minFPS = metrics.getFPSMin(); + ASSERT_GT(minFPS, 5.0f, "Min FPS should be > 5 (recovery test allows slower frames)"); + reporter.addMetric("fps_min", minFPS); + reporter.addMetric("fps_avg", metrics.getFPSAvg()); + + // Recovery time threshold + ASSERT_LT(recoveryTime, 500.0f, "Recovery time should be < 500ms"); + + // Crash count + int totalCrashes = metrics.getCrashCount(); + ASSERT_EQ(totalCrashes, 1, "Should have exactly 1 controlled crash"); + reporter.addMetric("total_crashes", static_cast(totalCrashes)); + + // === RAPPORTS === + std::cout << "\n"; + std::cout << "Summary:\n"; + std::cout << " 🎯 Crash detected at frame " << crashFrame << " (expected: 60)\n"; + std::cout << " 🔄 Recovery time: " << recoveryTime << "ms\n"; + std::cout << " ✅ Stable execution: " << framesAfterRecovery << " frames after recovery\n"; + std::cout << " 💾 Memory growth: " << memGrowthMB << " MB\n"; + std::cout << " 📊 FPS: min=" << minFPS << ", avg=" << metrics.getFPSAvg() << "\n\n"; + + metrics.printReport(); + reporter.printFinalReport(); + + return reporter.getExitCode(); +} diff --git a/tests/modules/ErrorRecoveryModule.cpp b/tests/modules/ErrorRecoveryModule.cpp new file mode 100644 index 0000000..e374af4 --- /dev/null +++ b/tests/modules/ErrorRecoveryModule.cpp @@ -0,0 +1,196 @@ +#include "ErrorRecoveryModule.h" +#include "grove/JsonDataNode.h" +#include +#include +#include +#include + +namespace grove { + +void ErrorRecoveryModule::setConfiguration(const IDataNode& configNode, IIO* io, ITaskScheduler* scheduler) { + // Logger + logger = spdlog::get("ErrorRecoveryModule"); + if (!logger) { + logger = spdlog::stdout_color_mt("ErrorRecoveryModule"); + } + logger->set_level(spdlog::level::debug); + + // Clone config + const auto* jsonConfigNode = dynamic_cast(&configNode); + if (jsonConfigNode) { + config = std::make_unique("config", jsonConfigNode->getJsonData()); + } else { + config = std::make_unique("config"); + } + + // Lire configuration + crashAtFrame = configNode.getInt("crashAtFrame", -1); + crashType = configNode.getInt("crashType", 0); + enableAutoRecovery = configNode.getBool("enableAutoRecovery", true); + versionTag = configNode.getString("versionTag", "v1.0"); + + logger->info("Initializing ErrorRecoveryModule"); + logger->info(" Version: {}", versionTag); + logger->info(" Crash at frame: {}", crashAtFrame); + logger->info(" Crash type: {}", crashType); + logger->info(" Auto-recovery enabled: {}", enableAutoRecovery); + + frameCount = 0; + crashCount = 0; + recoveryCount = 0; + hasCrashed = false; +} + +const IDataNode& ErrorRecoveryModule::getConfiguration() { + return *config; +} + +void ErrorRecoveryModule::process(const IDataNode& input) { + isProcessing = true; + frameCount++; + + // Si crash planifié à cette frame précise + if (crashAtFrame > 0 && frameCount == crashAtFrame) { + triggerConfiguredCrash(); + } + + isProcessing = false; +} + +void ErrorRecoveryModule::triggerConfiguredCrash() { + crashCount++; + hasCrashed = true; + + logger->warn("💥 CRASH TRIGGERED at frame {}", frameCount); + logger->warn(" Crash type: {}", crashType); + + switch (crashType) { + case 0: + logger->error("Throwing runtime_error"); + throw std::runtime_error("CRASH: Controlled runtime error at frame " + std::to_string(frameCount)); + + case 1: + logger->error("Throwing logic_error"); + throw std::logic_error("CRASH: Logic error at frame " + std::to_string(frameCount)); + + case 2: + logger->error("Throwing out_of_range"); + throw std::out_of_range("CRASH: Out of range at frame " + std::to_string(frameCount)); + + case 3: + logger->error("Throwing domain_error"); + throw std::domain_error("CRASH: Domain error at frame " + std::to_string(frameCount)); + + default: + logger->error("Unknown crash type, defaulting to runtime_error"); + throw std::runtime_error("CRASH: Unknown crash type at frame " + std::to_string(frameCount)); + } +} + +std::unique_ptr ErrorRecoveryModule::getHealthStatus() { + nlohmann::json healthJson; + healthJson["status"] = hasCrashed ? "crashed" : "healthy"; + healthJson["frameCount"] = frameCount; + healthJson["crashCount"] = crashCount; + healthJson["recoveryCount"] = recoveryCount; + healthJson["versionTag"] = versionTag; + return std::make_unique("health", healthJson); +} + +void ErrorRecoveryModule::shutdown() { + logger->info("Shutting down ErrorRecoveryModule"); + logger->info(" Version: {}", versionTag); + logger->info(" Total frames: {}", frameCount); + logger->info(" Crashes: {}", crashCount); + logger->info(" Recoveries: {}", recoveryCount); +} + +std::string ErrorRecoveryModule::getType() const { + return "error-recovery"; +} + +std::unique_ptr ErrorRecoveryModule::getState() { + nlohmann::json json; + json["frameCount"] = frameCount; + json["crashCount"] = crashCount; + json["recoveryCount"] = recoveryCount; + json["hasCrashed"] = hasCrashed; + json["versionTag"] = versionTag; + json["crashAtFrame"] = crashAtFrame; + + return std::make_unique("state", json); +} + +void ErrorRecoveryModule::setState(const IDataNode& state) { + const auto* jsonNode = dynamic_cast(&state); + if (!jsonNode) { + if (logger) { + logger->error("setState: Invalid state (not JsonDataNode)"); + } + return; + } + + const auto& json = jsonNode->getJsonData(); + + // Ensure logger is initialized (needed after hot-reload) + if (!logger) { + logger = spdlog::get("ErrorRecoveryModule"); + if (!logger) { + logger = spdlog::stdout_color_mt("ErrorRecoveryModule"); + } + } + + // Ensure config is initialized (needed after hot-reload) + if (!config) { + config = std::make_unique("config"); + } + + // AUTO-RECOVERY: Si le module avait crashé et que auto-recovery est activé + bool hadCrashed = json.value("hasCrashed", false); + if (hadCrashed && enableAutoRecovery) { + logger->warn("🔄 AUTO-RECOVERY TRIGGERED"); + logger->warn(" Module had crashed before reload"); + logger->warn(" Applying recovery strategy..."); + + // Récupérer l'état mais reset le flag de crash + frameCount = json.value("frameCount", 0); + crashCount = json.value("crashCount", 0); + recoveryCount = json.value("recoveryCount", 0) + 1; // Incrémenter recovery count + hasCrashed = false; // RECOVERY: On n'est plus en état crashé + + // Désactiver le crash planifié pour éviter de re-crasher + crashAtFrame = -1; + + versionTag = json.value("versionTag", "v1.0"); + + logger->info("✅ RECOVERY SUCCESSFUL"); + logger->info(" Frame count preserved: {}", frameCount); + logger->info(" Recovery count: {}", recoveryCount); + logger->info(" Crash trigger disabled"); + return; + } + + // État normal (pas de crash) + frameCount = json.value("frameCount", 0); + crashCount = json.value("crashCount", 0); + recoveryCount = json.value("recoveryCount", 0); + hasCrashed = json.value("hasCrashed", false); + versionTag = json.value("versionTag", "v1.0"); + crashAtFrame = json.value("crashAtFrame", -1); + + logger->info("State restored: frame {}, crashes {}, recoveries {}, version {}", + frameCount, crashCount, recoveryCount, versionTag); +} + +} // namespace grove + +// Export symbols +extern "C" { + grove::IModule* createModule() { + return new grove::ErrorRecoveryModule(); + } + + void destroyModule(grove::IModule* module) { + delete module; + } +} diff --git a/tests/modules/ErrorRecoveryModule.h b/tests/modules/ErrorRecoveryModule.h new file mode 100644 index 0000000..be4071a --- /dev/null +++ b/tests/modules/ErrorRecoveryModule.h @@ -0,0 +1,60 @@ +#pragma once +#include "grove/IModule.h" +#include "grove/IDataNode.h" +#include +#include + +namespace grove { + +/** + * ErrorRecoveryModule - Module de test pour validation du système de recovery + * + * Contrairement au ChaosModule (aléatoire), ce module permet de déclencher + * des crashes de manière CONTRÔLÉE via sa configuration : + * + * - crashAtFrame: Frame spécifique où crasher + * - crashType: Type de crash (runtime_error, logic_error, etc.) + * - enableAutoRecovery: Si true, le module peut se "guérir" après reload + * - versionTag: Tag de version pour valider hot-reload + */ +class ErrorRecoveryModule : public IModule { +public: + // IModule interface + void process(const IDataNode& input) override; + void setConfiguration(const IDataNode& configNode, IIO* io, ITaskScheduler* scheduler) override; + const IDataNode& getConfiguration() override; + std::unique_ptr getHealthStatus() override; + void shutdown() override; + std::unique_ptr getState() override; + void setState(const IDataNode& state) override; + std::string getType() const override; + bool isIdle() const override { return !isProcessing; } + +private: + // État du module + int frameCount = 0; + int crashCount = 0; + int recoveryCount = 0; + bool isProcessing = false; + bool hasCrashed = false; + + // Configuration + int crashAtFrame = -1; // -1 = pas de crash planifié + int crashType = 0; // 0=runtime_error, 1=logic_error, 2=out_of_range, 3=segfault simulation + bool enableAutoRecovery = true; + std::string versionTag = "v1.0"; + + std::shared_ptr logger; + std::unique_ptr config; + + // Déclenche le crash configuré + void triggerConfiguredCrash(); +}; + +} // namespace grove + +// Export symbols +extern "C" { + grove::IModule* createModule(); + void destroyModule(grove::IModule* module); +}