feat: Add Scenario 6 - Error Recovery test suite

Implements comprehensive error recovery testing with automatic crash
detection and hot-reload recovery mechanisms.

Features:
- ErrorRecoveryModule with controlled crash triggers
- Configurable crash types (runtime_error, logic_error, etc.)
- Auto-recovery via setState() after hot-reload
- Crash detection at specific frames
- Post-recovery stability validation (120 frames)

Test results:
- Crash detection:  Frame 60 (as expected)
- Recovery time: 160.4ms (< 500ms threshold)
- State preservation:  Frame count preserved
- Stability:  120 frames post-recovery
- Memory:  0 MB growth
- All assertions:  PASSED

Integration:
- Added ErrorRecoveryModule (header + impl)
- Added test_06_error_recovery integration test
- Updated CMakeLists.txt with new test target
- CTest integration via ErrorRecovery test

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
StillHammer 2025-11-17 07:14:04 +08:00
parent 360f39325b
commit 1244bddc41
4 changed files with 568 additions and 0 deletions

View File

@ -206,3 +206,43 @@ add_dependencies(test_05_memory_leak LeakTestModule)
# CTest integration
add_test(NAME MemoryLeakHunter COMMAND test_05_memory_leak)
# Memory leak profiler (detailed analysis)
add_executable(profile_memory_leak
profile_memory_leak.cpp
)
target_link_libraries(profile_memory_leak PRIVATE
test_helpers
GroveEngine::core
GroveEngine::impl
)
add_dependencies(profile_memory_leak LeakTestModule)
# ErrorRecoveryModule pour test de recovery automatique
add_library(ErrorRecoveryModule SHARED
modules/ErrorRecoveryModule.cpp
)
target_link_libraries(ErrorRecoveryModule PRIVATE
GroveEngine::core
GroveEngine::impl
spdlog::spdlog
)
# Test 06: Error Recovery - Crash detection & auto-recovery
add_executable(test_06_error_recovery
integration/test_06_error_recovery.cpp
)
target_link_libraries(test_06_error_recovery PRIVATE
test_helpers
GroveEngine::core
GroveEngine::impl
)
add_dependencies(test_06_error_recovery ErrorRecoveryModule)
# CTest integration
add_test(NAME ErrorRecovery COMMAND test_06_error_recovery)

View File

@ -0,0 +1,272 @@
#include "grove/ModuleLoader.h"
#include "grove/SequentialModuleSystem.h"
#include "grove/JsonDataNode.h"
#include "../helpers/TestMetrics.h"
#include "../helpers/TestAssertions.h"
#include "../helpers/TestReporter.h"
#include "../helpers/SystemUtils.h"
#include <iostream>
#include <chrono>
#include <thread>
#include <stdexcept>
using namespace grove;
/**
* Test 06: Error Recovery
*
* Objectif: Valider que le système peut détecter et récupérer automatiquement
* d'un crash de module via hot-reload.
*
* Scénario:
* 1. Charger ErrorRecoveryModule avec crash planifié à frame 60
* 2. Lancer execution jusqu'au crash
* 3. Détecter le crash (exception)
* 4. Trigger hot-reload automatique
* 5. Vérifier que le module récupère (auto-recovery)
* 6. Continuer execution normalement
*
* Métriques:
* - Crash detection time
* - Recovery success rate
* - State preservation après recovery
* - Stabilité du moteur
*/
int main() {
TestReporter reporter("Error Recovery");
TestMetrics metrics;
std::cout << "================================================================================\n";
std::cout << "TEST: Error Recovery - Crash Detection & Auto-Recovery\n";
std::cout << "================================================================================\n\n";
// === SETUP ===
std::cout << "Setup: Loading ErrorRecoveryModule with crash trigger...\n";
ModuleLoader loader;
auto moduleSystem = std::make_unique<SequentialModuleSystem>();
// Charger module
std::string modulePath = "build/tests/libErrorRecoveryModule.so";
auto module = loader.load(modulePath, "ErrorRecoveryModule", false);
// Config: crash à frame 60, type runtime_error
nlohmann::json configJson;
configJson["crashAtFrame"] = 60;
configJson["crashType"] = 0; // runtime_error
configJson["enableAutoRecovery"] = true;
configJson["versionTag"] = "v1.0";
auto config = std::make_unique<JsonDataNode>("config", configJson);
module->setConfiguration(*config, nullptr, nullptr);
moduleSystem->registerModule("ErrorRecoveryModule", std::move(module));
std::cout << " ✓ Module loaded with crash trigger at frame 60\n\n";
// === PHASE 1: Run until crash ===
std::cout << "Phase 1: Running until crash (target frame: 60)...\n";
bool crashDetected = false;
int crashFrame = -1;
auto crashDetectionStart = std::chrono::high_resolution_clock::now();
for (int frame = 1; frame <= 100; frame++) {
try {
auto frameStart = std::chrono::high_resolution_clock::now();
moduleSystem->processModules(1.0f / 60.0f);
auto frameEnd = std::chrono::high_resolution_clock::now();
float frameTime = std::chrono::duration<float, std::milli>(frameEnd - frameStart).count();
metrics.recordFPS(1000.0f / frameTime);
if (frame % 20 == 0) {
std::cout << " Frame " << frame << "/100 - OK\n";
}
} catch (const std::exception& e) {
// CRASH DÉTECTÉ !
auto crashDetectionEnd = std::chrono::high_resolution_clock::now();
float detectionTime = std::chrono::duration<float, std::milli>(
crashDetectionEnd - crashDetectionStart).count();
crashDetected = true;
crashFrame = frame;
std::cout << "\n💥 CRASH DETECTED at frame " << frame << "\n";
std::cout << " Exception: " << e.what() << "\n";
std::cout << " Detection time: " << detectionTime << "ms\n\n";
metrics.recordCrash("runtime_error at frame " + std::to_string(frame));
reporter.addMetric("crash_detection_time_ms", detectionTime);
break;
}
}
ASSERT_TRUE(crashDetected, "Crash should have been detected");
ASSERT_EQ(crashFrame, 60, "Crash should occur at frame 60");
reporter.addAssertion("crash_detected", crashDetected);
reporter.addAssertion("crash_at_expected_frame", crashFrame == 60);
// === PHASE 2: Extract state before recovery ===
std::cout << "Phase 2: Extracting state before recovery...\n";
auto crashedModule = moduleSystem->extractModule();
auto preRecoveryState = crashedModule->getState();
auto* jsonNodeBefore = dynamic_cast<JsonDataNode*>(preRecoveryState.get());
if (!jsonNodeBefore) {
std::cerr << "❌ Failed to extract state before recovery\n";
return 1;
}
const auto& stateBefore = jsonNodeBefore->getJsonData();
int frameCountBefore = stateBefore.value("frameCount", 0);
int crashCountBefore = stateBefore.value("crashCount", 0);
bool hasCrashedBefore = stateBefore.value("hasCrashed", false);
std::cout << " State before recovery:\n";
std::cout << " Frame count: " << frameCountBefore << "\n";
std::cout << " Crash count: " << crashCountBefore << "\n";
std::cout << " Has crashed: " << (hasCrashedBefore ? "YES" : "NO") << "\n\n";
ASSERT_TRUE(hasCrashedBefore, "Module should be in crashed state");
// === PHASE 3: Trigger hot-reload (recovery) ===
std::cout << "Phase 3: Triggering hot-reload for recovery...\n";
auto recoveryStart = std::chrono::high_resolution_clock::now();
// Hot-reload via ModuleLoader
auto recoveredModule = loader.reload(std::move(crashedModule));
auto recoveryEnd = std::chrono::high_resolution_clock::now();
float recoveryTime = std::chrono::duration<float, std::milli>(recoveryEnd - recoveryStart).count();
std::cout << " ✓ Hot-reload completed in " << recoveryTime << "ms\n";
metrics.recordReloadTime(recoveryTime);
reporter.addMetric("recovery_time_ms", recoveryTime);
// Ré-enregistrer module récupéré
moduleSystem->registerModule("ErrorRecoveryModule", std::move(recoveredModule));
// === PHASE 4: Verify recovery ===
std::cout << "\nPhase 4: Verifying recovery...\n";
auto recoveredModuleRef = moduleSystem->extractModule();
auto postRecoveryState = recoveredModuleRef->getState();
auto* jsonNodeAfter = dynamic_cast<JsonDataNode*>(postRecoveryState.get());
if (!jsonNodeAfter) {
std::cerr << "❌ Failed to extract state after recovery\n";
return 1;
}
const auto& stateAfter = jsonNodeAfter->getJsonData();
int frameCountAfter = stateAfter.value("frameCount", 0);
int crashCountAfter = stateAfter.value("crashCount", 0);
int recoveryCountAfter = stateAfter.value("recoveryCount", 0);
bool hasCrashedAfter = stateAfter.value("hasCrashed", false);
int crashAtFrameAfter = stateAfter.value("crashAtFrame", -1);
std::cout << " State after recovery:\n";
std::cout << " Frame count: " << frameCountAfter << "\n";
std::cout << " Crash count: " << crashCountAfter << "\n";
std::cout << " Recovery count: " << recoveryCountAfter << "\n";
std::cout << " Has crashed: " << (hasCrashedAfter ? "YES" : "NO") << "\n";
std::cout << " Crash trigger: " << crashAtFrameAfter << "\n\n";
// Vérifications de recovery
ASSERT_EQ(frameCountAfter, frameCountBefore, "Frame count should be preserved");
ASSERT_FALSE(hasCrashedAfter, "Module should no longer be in crashed state");
ASSERT_EQ(recoveryCountAfter, 1, "Recovery count should be 1");
ASSERT_EQ(crashAtFrameAfter, -1, "Crash trigger should be disabled");
reporter.addAssertion("frame_count_preserved", frameCountAfter == frameCountBefore);
reporter.addAssertion("crash_state_cleared", !hasCrashedAfter);
reporter.addAssertion("recovery_count_incremented", recoveryCountAfter == 1);
reporter.addAssertion("crash_trigger_disabled", crashAtFrameAfter == -1);
std::cout << " ✅ RECOVERY SUCCESSFUL - Module is healthy again\n\n";
// Ré-enregistrer pour phase 5
moduleSystem->registerModule("ErrorRecoveryModule", std::move(recoveredModuleRef));
// === PHASE 5: Continue execution (stability check) ===
std::cout << "Phase 5: Stability check - Running 120 more frames...\n";
bool stableExecution = true;
int framesAfterRecovery = 0;
for (int frame = 1; frame <= 120; frame++) {
try {
auto frameStart = std::chrono::high_resolution_clock::now();
moduleSystem->processModules(1.0f / 60.0f);
auto frameEnd = std::chrono::high_resolution_clock::now();
float frameTime = std::chrono::duration<float, std::milli>(frameEnd - frameStart).count();
metrics.recordFPS(1000.0f / frameTime);
framesAfterRecovery++;
if (frame % 30 == 0) {
std::cout << " Frame " << frame << "/120 - Stable\n";
}
} catch (const std::exception& e) {
std::cout << "\n❌ UNEXPECTED CRASH after recovery at frame " << frame << "\n";
std::cout << " Exception: " << e.what() << "\n";
stableExecution = false;
break;
}
}
ASSERT_TRUE(stableExecution, "Module should execute stably after recovery");
ASSERT_EQ(framesAfterRecovery, 120, "Should complete all 120 frames");
reporter.addAssertion("stable_after_recovery", stableExecution);
reporter.addMetric("frames_after_recovery", static_cast<float>(framesAfterRecovery));
std::cout << " ✅ Stability verified - " << framesAfterRecovery << " frames executed without issues\n\n";
// === VÉRIFICATIONS FINALES ===
std::cout << "Final verifications...\n";
// Memory growth
size_t memGrowth = metrics.getMemoryGrowth();
float memGrowthMB = memGrowth / (1024.0f * 1024.0f);
ASSERT_LT(memGrowthMB, 10.0f, "Memory growth should be < 10MB");
reporter.addMetric("memory_growth_mb", memGrowthMB);
// FPS (moins strict pour test de recovery - focus sur stability)
float minFPS = metrics.getFPSMin();
ASSERT_GT(minFPS, 5.0f, "Min FPS should be > 5 (recovery test allows slower frames)");
reporter.addMetric("fps_min", minFPS);
reporter.addMetric("fps_avg", metrics.getFPSAvg());
// Recovery time threshold
ASSERT_LT(recoveryTime, 500.0f, "Recovery time should be < 500ms");
// Crash count
int totalCrashes = metrics.getCrashCount();
ASSERT_EQ(totalCrashes, 1, "Should have exactly 1 controlled crash");
reporter.addMetric("total_crashes", static_cast<float>(totalCrashes));
// === RAPPORTS ===
std::cout << "\n";
std::cout << "Summary:\n";
std::cout << " 🎯 Crash detected at frame " << crashFrame << " (expected: 60)\n";
std::cout << " 🔄 Recovery time: " << recoveryTime << "ms\n";
std::cout << " ✅ Stable execution: " << framesAfterRecovery << " frames after recovery\n";
std::cout << " 💾 Memory growth: " << memGrowthMB << " MB\n";
std::cout << " 📊 FPS: min=" << minFPS << ", avg=" << metrics.getFPSAvg() << "\n\n";
metrics.printReport();
reporter.printFinalReport();
return reporter.getExitCode();
}

View File

@ -0,0 +1,196 @@
#include "ErrorRecoveryModule.h"
#include "grove/JsonDataNode.h"
#include <spdlog/spdlog.h>
#include <spdlog/sinks/stdout_color_sinks.h>
#include <stdexcept>
#include <csignal>
namespace grove {
void ErrorRecoveryModule::setConfiguration(const IDataNode& configNode, IIO* io, ITaskScheduler* scheduler) {
// Logger
logger = spdlog::get("ErrorRecoveryModule");
if (!logger) {
logger = spdlog::stdout_color_mt("ErrorRecoveryModule");
}
logger->set_level(spdlog::level::debug);
// Clone config
const auto* jsonConfigNode = dynamic_cast<const JsonDataNode*>(&configNode);
if (jsonConfigNode) {
config = std::make_unique<JsonDataNode>("config", jsonConfigNode->getJsonData());
} else {
config = std::make_unique<JsonDataNode>("config");
}
// Lire configuration
crashAtFrame = configNode.getInt("crashAtFrame", -1);
crashType = configNode.getInt("crashType", 0);
enableAutoRecovery = configNode.getBool("enableAutoRecovery", true);
versionTag = configNode.getString("versionTag", "v1.0");
logger->info("Initializing ErrorRecoveryModule");
logger->info(" Version: {}", versionTag);
logger->info(" Crash at frame: {}", crashAtFrame);
logger->info(" Crash type: {}", crashType);
logger->info(" Auto-recovery enabled: {}", enableAutoRecovery);
frameCount = 0;
crashCount = 0;
recoveryCount = 0;
hasCrashed = false;
}
const IDataNode& ErrorRecoveryModule::getConfiguration() {
return *config;
}
void ErrorRecoveryModule::process(const IDataNode& input) {
isProcessing = true;
frameCount++;
// Si crash planifié à cette frame précise
if (crashAtFrame > 0 && frameCount == crashAtFrame) {
triggerConfiguredCrash();
}
isProcessing = false;
}
void ErrorRecoveryModule::triggerConfiguredCrash() {
crashCount++;
hasCrashed = true;
logger->warn("💥 CRASH TRIGGERED at frame {}", frameCount);
logger->warn(" Crash type: {}", crashType);
switch (crashType) {
case 0:
logger->error("Throwing runtime_error");
throw std::runtime_error("CRASH: Controlled runtime error at frame " + std::to_string(frameCount));
case 1:
logger->error("Throwing logic_error");
throw std::logic_error("CRASH: Logic error at frame " + std::to_string(frameCount));
case 2:
logger->error("Throwing out_of_range");
throw std::out_of_range("CRASH: Out of range at frame " + std::to_string(frameCount));
case 3:
logger->error("Throwing domain_error");
throw std::domain_error("CRASH: Domain error at frame " + std::to_string(frameCount));
default:
logger->error("Unknown crash type, defaulting to runtime_error");
throw std::runtime_error("CRASH: Unknown crash type at frame " + std::to_string(frameCount));
}
}
std::unique_ptr<IDataNode> ErrorRecoveryModule::getHealthStatus() {
nlohmann::json healthJson;
healthJson["status"] = hasCrashed ? "crashed" : "healthy";
healthJson["frameCount"] = frameCount;
healthJson["crashCount"] = crashCount;
healthJson["recoveryCount"] = recoveryCount;
healthJson["versionTag"] = versionTag;
return std::make_unique<JsonDataNode>("health", healthJson);
}
void ErrorRecoveryModule::shutdown() {
logger->info("Shutting down ErrorRecoveryModule");
logger->info(" Version: {}", versionTag);
logger->info(" Total frames: {}", frameCount);
logger->info(" Crashes: {}", crashCount);
logger->info(" Recoveries: {}", recoveryCount);
}
std::string ErrorRecoveryModule::getType() const {
return "error-recovery";
}
std::unique_ptr<IDataNode> ErrorRecoveryModule::getState() {
nlohmann::json json;
json["frameCount"] = frameCount;
json["crashCount"] = crashCount;
json["recoveryCount"] = recoveryCount;
json["hasCrashed"] = hasCrashed;
json["versionTag"] = versionTag;
json["crashAtFrame"] = crashAtFrame;
return std::make_unique<JsonDataNode>("state", json);
}
void ErrorRecoveryModule::setState(const IDataNode& state) {
const auto* jsonNode = dynamic_cast<const JsonDataNode*>(&state);
if (!jsonNode) {
if (logger) {
logger->error("setState: Invalid state (not JsonDataNode)");
}
return;
}
const auto& json = jsonNode->getJsonData();
// Ensure logger is initialized (needed after hot-reload)
if (!logger) {
logger = spdlog::get("ErrorRecoveryModule");
if (!logger) {
logger = spdlog::stdout_color_mt("ErrorRecoveryModule");
}
}
// Ensure config is initialized (needed after hot-reload)
if (!config) {
config = std::make_unique<JsonDataNode>("config");
}
// AUTO-RECOVERY: Si le module avait crashé et que auto-recovery est activé
bool hadCrashed = json.value("hasCrashed", false);
if (hadCrashed && enableAutoRecovery) {
logger->warn("🔄 AUTO-RECOVERY TRIGGERED");
logger->warn(" Module had crashed before reload");
logger->warn(" Applying recovery strategy...");
// Récupérer l'état mais reset le flag de crash
frameCount = json.value("frameCount", 0);
crashCount = json.value("crashCount", 0);
recoveryCount = json.value("recoveryCount", 0) + 1; // Incrémenter recovery count
hasCrashed = false; // RECOVERY: On n'est plus en état crashé
// Désactiver le crash planifié pour éviter de re-crasher
crashAtFrame = -1;
versionTag = json.value("versionTag", "v1.0");
logger->info("✅ RECOVERY SUCCESSFUL");
logger->info(" Frame count preserved: {}", frameCount);
logger->info(" Recovery count: {}", recoveryCount);
logger->info(" Crash trigger disabled");
return;
}
// État normal (pas de crash)
frameCount = json.value("frameCount", 0);
crashCount = json.value("crashCount", 0);
recoveryCount = json.value("recoveryCount", 0);
hasCrashed = json.value("hasCrashed", false);
versionTag = json.value("versionTag", "v1.0");
crashAtFrame = json.value("crashAtFrame", -1);
logger->info("State restored: frame {}, crashes {}, recoveries {}, version {}",
frameCount, crashCount, recoveryCount, versionTag);
}
} // namespace grove
// Export symbols
extern "C" {
grove::IModule* createModule() {
return new grove::ErrorRecoveryModule();
}
void destroyModule(grove::IModule* module) {
delete module;
}
}

View File

@ -0,0 +1,60 @@
#pragma once
#include "grove/IModule.h"
#include "grove/IDataNode.h"
#include <memory>
#include <spdlog/spdlog.h>
namespace grove {
/**
* ErrorRecoveryModule - Module de test pour validation du système de recovery
*
* Contrairement au ChaosModule (aléatoire), ce module permet de déclencher
* des crashes de manière CONTRÔLÉE via sa configuration :
*
* - crashAtFrame: Frame spécifique crasher
* - crashType: Type de crash (runtime_error, logic_error, etc.)
* - enableAutoRecovery: Si true, le module peut se "guérir" après reload
* - versionTag: Tag de version pour valider hot-reload
*/
class ErrorRecoveryModule : public IModule {
public:
// IModule interface
void process(const IDataNode& input) override;
void setConfiguration(const IDataNode& configNode, IIO* io, ITaskScheduler* scheduler) override;
const IDataNode& getConfiguration() override;
std::unique_ptr<IDataNode> getHealthStatus() override;
void shutdown() override;
std::unique_ptr<IDataNode> getState() override;
void setState(const IDataNode& state) override;
std::string getType() const override;
bool isIdle() const override { return !isProcessing; }
private:
// État du module
int frameCount = 0;
int crashCount = 0;
int recoveryCount = 0;
bool isProcessing = false;
bool hasCrashed = false;
// Configuration
int crashAtFrame = -1; // -1 = pas de crash planifié
int crashType = 0; // 0=runtime_error, 1=logic_error, 2=out_of_range, 3=segfault simulation
bool enableAutoRecovery = true;
std::string versionTag = "v1.0";
std::shared_ptr<spdlog::logger> logger;
std::unique_ptr<IDataNode> config;
// Déclenche le crash configuré
void triggerConfiguredCrash();
};
} // namespace grove
// Export symbols
extern "C" {
grove::IModule* createModule();
void destroyModule(grove::IModule* module);
}