feat: Add Scenario 6 - Error Recovery test suite
Implements comprehensive error recovery testing with automatic crash detection and hot-reload recovery mechanisms. Features: - ErrorRecoveryModule with controlled crash triggers - Configurable crash types (runtime_error, logic_error, etc.) - Auto-recovery via setState() after hot-reload - Crash detection at specific frames - Post-recovery stability validation (120 frames) Test results: - Crash detection: ✅ Frame 60 (as expected) - Recovery time: 160.4ms (< 500ms threshold) - State preservation: ✅ Frame count preserved - Stability: ✅ 120 frames post-recovery - Memory: ✅ 0 MB growth - All assertions: ✅ PASSED Integration: - Added ErrorRecoveryModule (header + impl) - Added test_06_error_recovery integration test - Updated CMakeLists.txt with new test target - CTest integration via ErrorRecovery test 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
360f39325b
commit
1244bddc41
@ -206,3 +206,43 @@ add_dependencies(test_05_memory_leak LeakTestModule)
|
|||||||
|
|
||||||
# CTest integration
|
# CTest integration
|
||||||
add_test(NAME MemoryLeakHunter COMMAND test_05_memory_leak)
|
add_test(NAME MemoryLeakHunter COMMAND test_05_memory_leak)
|
||||||
|
|
||||||
|
# Memory leak profiler (detailed analysis)
|
||||||
|
add_executable(profile_memory_leak
|
||||||
|
profile_memory_leak.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
target_link_libraries(profile_memory_leak PRIVATE
|
||||||
|
test_helpers
|
||||||
|
GroveEngine::core
|
||||||
|
GroveEngine::impl
|
||||||
|
)
|
||||||
|
|
||||||
|
add_dependencies(profile_memory_leak LeakTestModule)
|
||||||
|
|
||||||
|
# ErrorRecoveryModule pour test de recovery automatique
|
||||||
|
add_library(ErrorRecoveryModule SHARED
|
||||||
|
modules/ErrorRecoveryModule.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
target_link_libraries(ErrorRecoveryModule PRIVATE
|
||||||
|
GroveEngine::core
|
||||||
|
GroveEngine::impl
|
||||||
|
spdlog::spdlog
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test 06: Error Recovery - Crash detection & auto-recovery
|
||||||
|
add_executable(test_06_error_recovery
|
||||||
|
integration/test_06_error_recovery.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
target_link_libraries(test_06_error_recovery PRIVATE
|
||||||
|
test_helpers
|
||||||
|
GroveEngine::core
|
||||||
|
GroveEngine::impl
|
||||||
|
)
|
||||||
|
|
||||||
|
add_dependencies(test_06_error_recovery ErrorRecoveryModule)
|
||||||
|
|
||||||
|
# CTest integration
|
||||||
|
add_test(NAME ErrorRecovery COMMAND test_06_error_recovery)
|
||||||
|
|||||||
272
tests/integration/test_06_error_recovery.cpp
Normal file
272
tests/integration/test_06_error_recovery.cpp
Normal file
@ -0,0 +1,272 @@
|
|||||||
|
#include "grove/ModuleLoader.h"
|
||||||
|
#include "grove/SequentialModuleSystem.h"
|
||||||
|
#include "grove/JsonDataNode.h"
|
||||||
|
#include "../helpers/TestMetrics.h"
|
||||||
|
#include "../helpers/TestAssertions.h"
|
||||||
|
#include "../helpers/TestReporter.h"
|
||||||
|
#include "../helpers/SystemUtils.h"
|
||||||
|
#include <iostream>
|
||||||
|
#include <chrono>
|
||||||
|
#include <thread>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
using namespace grove;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test 06: Error Recovery
|
||||||
|
*
|
||||||
|
* Objectif: Valider que le système peut détecter et récupérer automatiquement
|
||||||
|
* d'un crash de module via hot-reload.
|
||||||
|
*
|
||||||
|
* Scénario:
|
||||||
|
* 1. Charger ErrorRecoveryModule avec crash planifié à frame 60
|
||||||
|
* 2. Lancer execution jusqu'au crash
|
||||||
|
* 3. Détecter le crash (exception)
|
||||||
|
* 4. Trigger hot-reload automatique
|
||||||
|
* 5. Vérifier que le module récupère (auto-recovery)
|
||||||
|
* 6. Continuer execution normalement
|
||||||
|
*
|
||||||
|
* Métriques:
|
||||||
|
* - Crash detection time
|
||||||
|
* - Recovery success rate
|
||||||
|
* - State preservation après recovery
|
||||||
|
* - Stabilité du moteur
|
||||||
|
*/
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
TestReporter reporter("Error Recovery");
|
||||||
|
TestMetrics metrics;
|
||||||
|
|
||||||
|
std::cout << "================================================================================\n";
|
||||||
|
std::cout << "TEST: Error Recovery - Crash Detection & Auto-Recovery\n";
|
||||||
|
std::cout << "================================================================================\n\n";
|
||||||
|
|
||||||
|
// === SETUP ===
|
||||||
|
std::cout << "Setup: Loading ErrorRecoveryModule with crash trigger...\n";
|
||||||
|
|
||||||
|
ModuleLoader loader;
|
||||||
|
auto moduleSystem = std::make_unique<SequentialModuleSystem>();
|
||||||
|
|
||||||
|
// Charger module
|
||||||
|
std::string modulePath = "build/tests/libErrorRecoveryModule.so";
|
||||||
|
auto module = loader.load(modulePath, "ErrorRecoveryModule", false);
|
||||||
|
|
||||||
|
// Config: crash à frame 60, type runtime_error
|
||||||
|
nlohmann::json configJson;
|
||||||
|
configJson["crashAtFrame"] = 60;
|
||||||
|
configJson["crashType"] = 0; // runtime_error
|
||||||
|
configJson["enableAutoRecovery"] = true;
|
||||||
|
configJson["versionTag"] = "v1.0";
|
||||||
|
auto config = std::make_unique<JsonDataNode>("config", configJson);
|
||||||
|
|
||||||
|
module->setConfiguration(*config, nullptr, nullptr);
|
||||||
|
moduleSystem->registerModule("ErrorRecoveryModule", std::move(module));
|
||||||
|
|
||||||
|
std::cout << " ✓ Module loaded with crash trigger at frame 60\n\n";
|
||||||
|
|
||||||
|
// === PHASE 1: Run until crash ===
|
||||||
|
std::cout << "Phase 1: Running until crash (target frame: 60)...\n";
|
||||||
|
|
||||||
|
bool crashDetected = false;
|
||||||
|
int crashFrame = -1;
|
||||||
|
auto crashDetectionStart = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
for (int frame = 1; frame <= 100; frame++) {
|
||||||
|
try {
|
||||||
|
auto frameStart = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
moduleSystem->processModules(1.0f / 60.0f);
|
||||||
|
|
||||||
|
auto frameEnd = std::chrono::high_resolution_clock::now();
|
||||||
|
float frameTime = std::chrono::duration<float, std::milli>(frameEnd - frameStart).count();
|
||||||
|
metrics.recordFPS(1000.0f / frameTime);
|
||||||
|
|
||||||
|
if (frame % 20 == 0) {
|
||||||
|
std::cout << " Frame " << frame << "/100 - OK\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (const std::exception& e) {
|
||||||
|
// CRASH DÉTECTÉ !
|
||||||
|
auto crashDetectionEnd = std::chrono::high_resolution_clock::now();
|
||||||
|
float detectionTime = std::chrono::duration<float, std::milli>(
|
||||||
|
crashDetectionEnd - crashDetectionStart).count();
|
||||||
|
|
||||||
|
crashDetected = true;
|
||||||
|
crashFrame = frame;
|
||||||
|
|
||||||
|
std::cout << "\n💥 CRASH DETECTED at frame " << frame << "\n";
|
||||||
|
std::cout << " Exception: " << e.what() << "\n";
|
||||||
|
std::cout << " Detection time: " << detectionTime << "ms\n\n";
|
||||||
|
|
||||||
|
metrics.recordCrash("runtime_error at frame " + std::to_string(frame));
|
||||||
|
reporter.addMetric("crash_detection_time_ms", detectionTime);
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_TRUE(crashDetected, "Crash should have been detected");
|
||||||
|
ASSERT_EQ(crashFrame, 60, "Crash should occur at frame 60");
|
||||||
|
reporter.addAssertion("crash_detected", crashDetected);
|
||||||
|
reporter.addAssertion("crash_at_expected_frame", crashFrame == 60);
|
||||||
|
|
||||||
|
// === PHASE 2: Extract state before recovery ===
|
||||||
|
std::cout << "Phase 2: Extracting state before recovery...\n";
|
||||||
|
|
||||||
|
auto crashedModule = moduleSystem->extractModule();
|
||||||
|
auto preRecoveryState = crashedModule->getState();
|
||||||
|
|
||||||
|
auto* jsonNodeBefore = dynamic_cast<JsonDataNode*>(preRecoveryState.get());
|
||||||
|
if (!jsonNodeBefore) {
|
||||||
|
std::cerr << "❌ Failed to extract state before recovery\n";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto& stateBefore = jsonNodeBefore->getJsonData();
|
||||||
|
int frameCountBefore = stateBefore.value("frameCount", 0);
|
||||||
|
int crashCountBefore = stateBefore.value("crashCount", 0);
|
||||||
|
bool hasCrashedBefore = stateBefore.value("hasCrashed", false);
|
||||||
|
|
||||||
|
std::cout << " State before recovery:\n";
|
||||||
|
std::cout << " Frame count: " << frameCountBefore << "\n";
|
||||||
|
std::cout << " Crash count: " << crashCountBefore << "\n";
|
||||||
|
std::cout << " Has crashed: " << (hasCrashedBefore ? "YES" : "NO") << "\n\n";
|
||||||
|
|
||||||
|
ASSERT_TRUE(hasCrashedBefore, "Module should be in crashed state");
|
||||||
|
|
||||||
|
// === PHASE 3: Trigger hot-reload (recovery) ===
|
||||||
|
std::cout << "Phase 3: Triggering hot-reload for recovery...\n";
|
||||||
|
|
||||||
|
auto recoveryStart = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
// Hot-reload via ModuleLoader
|
||||||
|
auto recoveredModule = loader.reload(std::move(crashedModule));
|
||||||
|
|
||||||
|
auto recoveryEnd = std::chrono::high_resolution_clock::now();
|
||||||
|
float recoveryTime = std::chrono::duration<float, std::milli>(recoveryEnd - recoveryStart).count();
|
||||||
|
|
||||||
|
std::cout << " ✓ Hot-reload completed in " << recoveryTime << "ms\n";
|
||||||
|
|
||||||
|
metrics.recordReloadTime(recoveryTime);
|
||||||
|
reporter.addMetric("recovery_time_ms", recoveryTime);
|
||||||
|
|
||||||
|
// Ré-enregistrer module récupéré
|
||||||
|
moduleSystem->registerModule("ErrorRecoveryModule", std::move(recoveredModule));
|
||||||
|
|
||||||
|
// === PHASE 4: Verify recovery ===
|
||||||
|
std::cout << "\nPhase 4: Verifying recovery...\n";
|
||||||
|
|
||||||
|
auto recoveredModuleRef = moduleSystem->extractModule();
|
||||||
|
auto postRecoveryState = recoveredModuleRef->getState();
|
||||||
|
|
||||||
|
auto* jsonNodeAfter = dynamic_cast<JsonDataNode*>(postRecoveryState.get());
|
||||||
|
if (!jsonNodeAfter) {
|
||||||
|
std::cerr << "❌ Failed to extract state after recovery\n";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto& stateAfter = jsonNodeAfter->getJsonData();
|
||||||
|
int frameCountAfter = stateAfter.value("frameCount", 0);
|
||||||
|
int crashCountAfter = stateAfter.value("crashCount", 0);
|
||||||
|
int recoveryCountAfter = stateAfter.value("recoveryCount", 0);
|
||||||
|
bool hasCrashedAfter = stateAfter.value("hasCrashed", false);
|
||||||
|
int crashAtFrameAfter = stateAfter.value("crashAtFrame", -1);
|
||||||
|
|
||||||
|
std::cout << " State after recovery:\n";
|
||||||
|
std::cout << " Frame count: " << frameCountAfter << "\n";
|
||||||
|
std::cout << " Crash count: " << crashCountAfter << "\n";
|
||||||
|
std::cout << " Recovery count: " << recoveryCountAfter << "\n";
|
||||||
|
std::cout << " Has crashed: " << (hasCrashedAfter ? "YES" : "NO") << "\n";
|
||||||
|
std::cout << " Crash trigger: " << crashAtFrameAfter << "\n\n";
|
||||||
|
|
||||||
|
// Vérifications de recovery
|
||||||
|
ASSERT_EQ(frameCountAfter, frameCountBefore, "Frame count should be preserved");
|
||||||
|
ASSERT_FALSE(hasCrashedAfter, "Module should no longer be in crashed state");
|
||||||
|
ASSERT_EQ(recoveryCountAfter, 1, "Recovery count should be 1");
|
||||||
|
ASSERT_EQ(crashAtFrameAfter, -1, "Crash trigger should be disabled");
|
||||||
|
|
||||||
|
reporter.addAssertion("frame_count_preserved", frameCountAfter == frameCountBefore);
|
||||||
|
reporter.addAssertion("crash_state_cleared", !hasCrashedAfter);
|
||||||
|
reporter.addAssertion("recovery_count_incremented", recoveryCountAfter == 1);
|
||||||
|
reporter.addAssertion("crash_trigger_disabled", crashAtFrameAfter == -1);
|
||||||
|
|
||||||
|
std::cout << " ✅ RECOVERY SUCCESSFUL - Module is healthy again\n\n";
|
||||||
|
|
||||||
|
// Ré-enregistrer pour phase 5
|
||||||
|
moduleSystem->registerModule("ErrorRecoveryModule", std::move(recoveredModuleRef));
|
||||||
|
|
||||||
|
// === PHASE 5: Continue execution (stability check) ===
|
||||||
|
std::cout << "Phase 5: Stability check - Running 120 more frames...\n";
|
||||||
|
|
||||||
|
bool stableExecution = true;
|
||||||
|
int framesAfterRecovery = 0;
|
||||||
|
|
||||||
|
for (int frame = 1; frame <= 120; frame++) {
|
||||||
|
try {
|
||||||
|
auto frameStart = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
moduleSystem->processModules(1.0f / 60.0f);
|
||||||
|
|
||||||
|
auto frameEnd = std::chrono::high_resolution_clock::now();
|
||||||
|
float frameTime = std::chrono::duration<float, std::milli>(frameEnd - frameStart).count();
|
||||||
|
metrics.recordFPS(1000.0f / frameTime);
|
||||||
|
|
||||||
|
framesAfterRecovery++;
|
||||||
|
|
||||||
|
if (frame % 30 == 0) {
|
||||||
|
std::cout << " Frame " << frame << "/120 - Stable\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (const std::exception& e) {
|
||||||
|
std::cout << "\n❌ UNEXPECTED CRASH after recovery at frame " << frame << "\n";
|
||||||
|
std::cout << " Exception: " << e.what() << "\n";
|
||||||
|
stableExecution = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_TRUE(stableExecution, "Module should execute stably after recovery");
|
||||||
|
ASSERT_EQ(framesAfterRecovery, 120, "Should complete all 120 frames");
|
||||||
|
|
||||||
|
reporter.addAssertion("stable_after_recovery", stableExecution);
|
||||||
|
reporter.addMetric("frames_after_recovery", static_cast<float>(framesAfterRecovery));
|
||||||
|
|
||||||
|
std::cout << " ✅ Stability verified - " << framesAfterRecovery << " frames executed without issues\n\n";
|
||||||
|
|
||||||
|
// === VÉRIFICATIONS FINALES ===
|
||||||
|
std::cout << "Final verifications...\n";
|
||||||
|
|
||||||
|
// Memory growth
|
||||||
|
size_t memGrowth = metrics.getMemoryGrowth();
|
||||||
|
float memGrowthMB = memGrowth / (1024.0f * 1024.0f);
|
||||||
|
ASSERT_LT(memGrowthMB, 10.0f, "Memory growth should be < 10MB");
|
||||||
|
reporter.addMetric("memory_growth_mb", memGrowthMB);
|
||||||
|
|
||||||
|
// FPS (moins strict pour test de recovery - focus sur stability)
|
||||||
|
float minFPS = metrics.getFPSMin();
|
||||||
|
ASSERT_GT(minFPS, 5.0f, "Min FPS should be > 5 (recovery test allows slower frames)");
|
||||||
|
reporter.addMetric("fps_min", minFPS);
|
||||||
|
reporter.addMetric("fps_avg", metrics.getFPSAvg());
|
||||||
|
|
||||||
|
// Recovery time threshold
|
||||||
|
ASSERT_LT(recoveryTime, 500.0f, "Recovery time should be < 500ms");
|
||||||
|
|
||||||
|
// Crash count
|
||||||
|
int totalCrashes = metrics.getCrashCount();
|
||||||
|
ASSERT_EQ(totalCrashes, 1, "Should have exactly 1 controlled crash");
|
||||||
|
reporter.addMetric("total_crashes", static_cast<float>(totalCrashes));
|
||||||
|
|
||||||
|
// === RAPPORTS ===
|
||||||
|
std::cout << "\n";
|
||||||
|
std::cout << "Summary:\n";
|
||||||
|
std::cout << " 🎯 Crash detected at frame " << crashFrame << " (expected: 60)\n";
|
||||||
|
std::cout << " 🔄 Recovery time: " << recoveryTime << "ms\n";
|
||||||
|
std::cout << " ✅ Stable execution: " << framesAfterRecovery << " frames after recovery\n";
|
||||||
|
std::cout << " 💾 Memory growth: " << memGrowthMB << " MB\n";
|
||||||
|
std::cout << " 📊 FPS: min=" << minFPS << ", avg=" << metrics.getFPSAvg() << "\n\n";
|
||||||
|
|
||||||
|
metrics.printReport();
|
||||||
|
reporter.printFinalReport();
|
||||||
|
|
||||||
|
return reporter.getExitCode();
|
||||||
|
}
|
||||||
196
tests/modules/ErrorRecoveryModule.cpp
Normal file
196
tests/modules/ErrorRecoveryModule.cpp
Normal file
@ -0,0 +1,196 @@
|
|||||||
|
#include "ErrorRecoveryModule.h"
|
||||||
|
#include "grove/JsonDataNode.h"
|
||||||
|
#include <spdlog/spdlog.h>
|
||||||
|
#include <spdlog/sinks/stdout_color_sinks.h>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <csignal>
|
||||||
|
|
||||||
|
namespace grove {
|
||||||
|
|
||||||
|
void ErrorRecoveryModule::setConfiguration(const IDataNode& configNode, IIO* io, ITaskScheduler* scheduler) {
|
||||||
|
// Logger
|
||||||
|
logger = spdlog::get("ErrorRecoveryModule");
|
||||||
|
if (!logger) {
|
||||||
|
logger = spdlog::stdout_color_mt("ErrorRecoveryModule");
|
||||||
|
}
|
||||||
|
logger->set_level(spdlog::level::debug);
|
||||||
|
|
||||||
|
// Clone config
|
||||||
|
const auto* jsonConfigNode = dynamic_cast<const JsonDataNode*>(&configNode);
|
||||||
|
if (jsonConfigNode) {
|
||||||
|
config = std::make_unique<JsonDataNode>("config", jsonConfigNode->getJsonData());
|
||||||
|
} else {
|
||||||
|
config = std::make_unique<JsonDataNode>("config");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Lire configuration
|
||||||
|
crashAtFrame = configNode.getInt("crashAtFrame", -1);
|
||||||
|
crashType = configNode.getInt("crashType", 0);
|
||||||
|
enableAutoRecovery = configNode.getBool("enableAutoRecovery", true);
|
||||||
|
versionTag = configNode.getString("versionTag", "v1.0");
|
||||||
|
|
||||||
|
logger->info("Initializing ErrorRecoveryModule");
|
||||||
|
logger->info(" Version: {}", versionTag);
|
||||||
|
logger->info(" Crash at frame: {}", crashAtFrame);
|
||||||
|
logger->info(" Crash type: {}", crashType);
|
||||||
|
logger->info(" Auto-recovery enabled: {}", enableAutoRecovery);
|
||||||
|
|
||||||
|
frameCount = 0;
|
||||||
|
crashCount = 0;
|
||||||
|
recoveryCount = 0;
|
||||||
|
hasCrashed = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const IDataNode& ErrorRecoveryModule::getConfiguration() {
|
||||||
|
return *config;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ErrorRecoveryModule::process(const IDataNode& input) {
|
||||||
|
isProcessing = true;
|
||||||
|
frameCount++;
|
||||||
|
|
||||||
|
// Si crash planifié à cette frame précise
|
||||||
|
if (crashAtFrame > 0 && frameCount == crashAtFrame) {
|
||||||
|
triggerConfiguredCrash();
|
||||||
|
}
|
||||||
|
|
||||||
|
isProcessing = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ErrorRecoveryModule::triggerConfiguredCrash() {
|
||||||
|
crashCount++;
|
||||||
|
hasCrashed = true;
|
||||||
|
|
||||||
|
logger->warn("💥 CRASH TRIGGERED at frame {}", frameCount);
|
||||||
|
logger->warn(" Crash type: {}", crashType);
|
||||||
|
|
||||||
|
switch (crashType) {
|
||||||
|
case 0:
|
||||||
|
logger->error("Throwing runtime_error");
|
||||||
|
throw std::runtime_error("CRASH: Controlled runtime error at frame " + std::to_string(frameCount));
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
logger->error("Throwing logic_error");
|
||||||
|
throw std::logic_error("CRASH: Logic error at frame " + std::to_string(frameCount));
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
logger->error("Throwing out_of_range");
|
||||||
|
throw std::out_of_range("CRASH: Out of range at frame " + std::to_string(frameCount));
|
||||||
|
|
||||||
|
case 3:
|
||||||
|
logger->error("Throwing domain_error");
|
||||||
|
throw std::domain_error("CRASH: Domain error at frame " + std::to_string(frameCount));
|
||||||
|
|
||||||
|
default:
|
||||||
|
logger->error("Unknown crash type, defaulting to runtime_error");
|
||||||
|
throw std::runtime_error("CRASH: Unknown crash type at frame " + std::to_string(frameCount));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<IDataNode> ErrorRecoveryModule::getHealthStatus() {
|
||||||
|
nlohmann::json healthJson;
|
||||||
|
healthJson["status"] = hasCrashed ? "crashed" : "healthy";
|
||||||
|
healthJson["frameCount"] = frameCount;
|
||||||
|
healthJson["crashCount"] = crashCount;
|
||||||
|
healthJson["recoveryCount"] = recoveryCount;
|
||||||
|
healthJson["versionTag"] = versionTag;
|
||||||
|
return std::make_unique<JsonDataNode>("health", healthJson);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ErrorRecoveryModule::shutdown() {
|
||||||
|
logger->info("Shutting down ErrorRecoveryModule");
|
||||||
|
logger->info(" Version: {}", versionTag);
|
||||||
|
logger->info(" Total frames: {}", frameCount);
|
||||||
|
logger->info(" Crashes: {}", crashCount);
|
||||||
|
logger->info(" Recoveries: {}", recoveryCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string ErrorRecoveryModule::getType() const {
|
||||||
|
return "error-recovery";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<IDataNode> ErrorRecoveryModule::getState() {
|
||||||
|
nlohmann::json json;
|
||||||
|
json["frameCount"] = frameCount;
|
||||||
|
json["crashCount"] = crashCount;
|
||||||
|
json["recoveryCount"] = recoveryCount;
|
||||||
|
json["hasCrashed"] = hasCrashed;
|
||||||
|
json["versionTag"] = versionTag;
|
||||||
|
json["crashAtFrame"] = crashAtFrame;
|
||||||
|
|
||||||
|
return std::make_unique<JsonDataNode>("state", json);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ErrorRecoveryModule::setState(const IDataNode& state) {
|
||||||
|
const auto* jsonNode = dynamic_cast<const JsonDataNode*>(&state);
|
||||||
|
if (!jsonNode) {
|
||||||
|
if (logger) {
|
||||||
|
logger->error("setState: Invalid state (not JsonDataNode)");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto& json = jsonNode->getJsonData();
|
||||||
|
|
||||||
|
// Ensure logger is initialized (needed after hot-reload)
|
||||||
|
if (!logger) {
|
||||||
|
logger = spdlog::get("ErrorRecoveryModule");
|
||||||
|
if (!logger) {
|
||||||
|
logger = spdlog::stdout_color_mt("ErrorRecoveryModule");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure config is initialized (needed after hot-reload)
|
||||||
|
if (!config) {
|
||||||
|
config = std::make_unique<JsonDataNode>("config");
|
||||||
|
}
|
||||||
|
|
||||||
|
// AUTO-RECOVERY: Si le module avait crashé et que auto-recovery est activé
|
||||||
|
bool hadCrashed = json.value("hasCrashed", false);
|
||||||
|
if (hadCrashed && enableAutoRecovery) {
|
||||||
|
logger->warn("🔄 AUTO-RECOVERY TRIGGERED");
|
||||||
|
logger->warn(" Module had crashed before reload");
|
||||||
|
logger->warn(" Applying recovery strategy...");
|
||||||
|
|
||||||
|
// Récupérer l'état mais reset le flag de crash
|
||||||
|
frameCount = json.value("frameCount", 0);
|
||||||
|
crashCount = json.value("crashCount", 0);
|
||||||
|
recoveryCount = json.value("recoveryCount", 0) + 1; // Incrémenter recovery count
|
||||||
|
hasCrashed = false; // RECOVERY: On n'est plus en état crashé
|
||||||
|
|
||||||
|
// Désactiver le crash planifié pour éviter de re-crasher
|
||||||
|
crashAtFrame = -1;
|
||||||
|
|
||||||
|
versionTag = json.value("versionTag", "v1.0");
|
||||||
|
|
||||||
|
logger->info("✅ RECOVERY SUCCESSFUL");
|
||||||
|
logger->info(" Frame count preserved: {}", frameCount);
|
||||||
|
logger->info(" Recovery count: {}", recoveryCount);
|
||||||
|
logger->info(" Crash trigger disabled");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// État normal (pas de crash)
|
||||||
|
frameCount = json.value("frameCount", 0);
|
||||||
|
crashCount = json.value("crashCount", 0);
|
||||||
|
recoveryCount = json.value("recoveryCount", 0);
|
||||||
|
hasCrashed = json.value("hasCrashed", false);
|
||||||
|
versionTag = json.value("versionTag", "v1.0");
|
||||||
|
crashAtFrame = json.value("crashAtFrame", -1);
|
||||||
|
|
||||||
|
logger->info("State restored: frame {}, crashes {}, recoveries {}, version {}",
|
||||||
|
frameCount, crashCount, recoveryCount, versionTag);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace grove
|
||||||
|
|
||||||
|
// Export symbols
|
||||||
|
extern "C" {
|
||||||
|
grove::IModule* createModule() {
|
||||||
|
return new grove::ErrorRecoveryModule();
|
||||||
|
}
|
||||||
|
|
||||||
|
void destroyModule(grove::IModule* module) {
|
||||||
|
delete module;
|
||||||
|
}
|
||||||
|
}
|
||||||
60
tests/modules/ErrorRecoveryModule.h
Normal file
60
tests/modules/ErrorRecoveryModule.h
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
#pragma once
|
||||||
|
#include "grove/IModule.h"
|
||||||
|
#include "grove/IDataNode.h"
|
||||||
|
#include <memory>
|
||||||
|
#include <spdlog/spdlog.h>
|
||||||
|
|
||||||
|
namespace grove {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ErrorRecoveryModule - Module de test pour validation du système de recovery
|
||||||
|
*
|
||||||
|
* Contrairement au ChaosModule (aléatoire), ce module permet de déclencher
|
||||||
|
* des crashes de manière CONTRÔLÉE via sa configuration :
|
||||||
|
*
|
||||||
|
* - crashAtFrame: Frame spécifique où crasher
|
||||||
|
* - crashType: Type de crash (runtime_error, logic_error, etc.)
|
||||||
|
* - enableAutoRecovery: Si true, le module peut se "guérir" après reload
|
||||||
|
* - versionTag: Tag de version pour valider hot-reload
|
||||||
|
*/
|
||||||
|
class ErrorRecoveryModule : public IModule {
|
||||||
|
public:
|
||||||
|
// IModule interface
|
||||||
|
void process(const IDataNode& input) override;
|
||||||
|
void setConfiguration(const IDataNode& configNode, IIO* io, ITaskScheduler* scheduler) override;
|
||||||
|
const IDataNode& getConfiguration() override;
|
||||||
|
std::unique_ptr<IDataNode> getHealthStatus() override;
|
||||||
|
void shutdown() override;
|
||||||
|
std::unique_ptr<IDataNode> getState() override;
|
||||||
|
void setState(const IDataNode& state) override;
|
||||||
|
std::string getType() const override;
|
||||||
|
bool isIdle() const override { return !isProcessing; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
// État du module
|
||||||
|
int frameCount = 0;
|
||||||
|
int crashCount = 0;
|
||||||
|
int recoveryCount = 0;
|
||||||
|
bool isProcessing = false;
|
||||||
|
bool hasCrashed = false;
|
||||||
|
|
||||||
|
// Configuration
|
||||||
|
int crashAtFrame = -1; // -1 = pas de crash planifié
|
||||||
|
int crashType = 0; // 0=runtime_error, 1=logic_error, 2=out_of_range, 3=segfault simulation
|
||||||
|
bool enableAutoRecovery = true;
|
||||||
|
std::string versionTag = "v1.0";
|
||||||
|
|
||||||
|
std::shared_ptr<spdlog::logger> logger;
|
||||||
|
std::unique_ptr<IDataNode> config;
|
||||||
|
|
||||||
|
// Déclenche le crash configuré
|
||||||
|
void triggerConfiguredCrash();
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace grove
|
||||||
|
|
||||||
|
// Export symbols
|
||||||
|
extern "C" {
|
||||||
|
grove::IModule* createModule();
|
||||||
|
void destroyModule(grove::IModule* module);
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue
Block a user