From 37b62b55e893be89cc48b4c8670a9127bb49387e Mon Sep 17 00:00:00 2001 From: StillHammer Date: Thu, 27 Nov 2025 14:01:25 +0800 Subject: [PATCH] feat: Implement FileSystem tools for agentic LLM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Claude Code style file manipulation tools: - read_file: Read file content with line numbers (offset/limit support) - write_file: Create or overwrite files - edit_file: Replace exact string in file (unique match or replace_all) - list_directory: List directory contents with type/size - glob_files: Search files by pattern (**/*.cpp) - grep_files: Search content with regex Features: - Security: configurable allowed paths, blocked patterns (*.env, *.key) - Size limits: 1MB read, 10MB write - Path canonicalization to prevent traversal attacks - Integrated into LLMService tool registry 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CMakeLists.txt | 3 +- src/services/LLMService.cpp | 16 +- src/services/LLMService.hpp | 1 + src/shared/tools/FileSystemTools.cpp | 595 +++++++++++++++++++++++++++ src/shared/tools/FileSystemTools.hpp | 74 ++++ 5 files changed, 687 insertions(+), 2 deletions(-) create mode 100644 src/shared/tools/FileSystemTools.cpp create mode 100644 src/shared/tools/FileSystemTools.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 8fbd21d..e0920b8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,9 +73,10 @@ if(OPENSSL_FOUND) target_compile_definitions(AissiaLLM PRIVATE CPPHTTPLIB_OPENSSL_SUPPORT) endif() -# Tools Library (Internal tools + MCP client) +# Tools Library (Internal tools + FileSystem tools + MCP client) add_library(AissiaTools STATIC src/shared/tools/InternalTools.cpp + src/shared/tools/FileSystemTools.cpp src/shared/mcp/StdioTransport.cpp src/shared/mcp/MCPClient.cpp ) diff --git a/src/services/LLMService.cpp b/src/services/LLMService.cpp index 7cd4bd1..b531f1f 100644 --- a/src/services/LLMService.cpp +++ b/src/services/LLMService.cpp @@ -83,7 +83,21 @@ void LLMService::initializeTools() { m_logger->info("Registered {} internal tools", m_internalTools->size()); } - // 2. MCP tools (via external servers) + // 2. FileSystem tools (direct C++ execution) + for (const auto& toolDef : tools::FileSystemTools::getToolDefinitions()) { + std::string toolName = toolDef["name"].get(); + m_toolRegistry.registerTool( + toolName, + toolDef["description"].get(), + toolDef["input_schema"], + [toolName](const nlohmann::json& input) -> nlohmann::json { + return tools::FileSystemTools::execute(toolName, input); + } + ); + } + m_logger->info("Registered {} filesystem tools", tools::FileSystemTools::getToolDefinitions().size()); + + // 3. MCP tools (via external servers) m_mcpClient = std::make_unique(); if (loadMCPConfig("config/mcp.json")) { int connected = m_mcpClient->connectAll(); diff --git a/src/services/LLMService.hpp b/src/services/LLMService.hpp index 4929ab8..d255ad0 100644 --- a/src/services/LLMService.hpp +++ b/src/services/LLMService.hpp @@ -4,6 +4,7 @@ #include "../shared/llm/ILLMProvider.hpp" #include "../shared/llm/ToolRegistry.hpp" #include "../shared/tools/InternalTools.hpp" +#include "../shared/tools/FileSystemTools.hpp" #include "../shared/mcp/MCPClient.hpp" #include diff --git a/src/shared/tools/FileSystemTools.cpp b/src/shared/tools/FileSystemTools.cpp new file mode 100644 index 0000000..26c36f2 --- /dev/null +++ b/src/shared/tools/FileSystemTools.cpp @@ -0,0 +1,595 @@ +#include "FileSystemTools.hpp" + +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + +namespace aissia::tools { + +// Static configuration defaults +std::vector FileSystemTools::s_allowedPaths = {}; +std::vector FileSystemTools::s_blockedPatterns = {"*.env", "*credentials*", "*.key", "*.pem"}; +size_t FileSystemTools::s_maxReadSize = 1024 * 1024; // 1MB +size_t FileSystemTools::s_maxWriteSize = 10 * 1024 * 1024; // 10MB + +// ============================================================================ +// Tool Definitions +// ============================================================================ + +std::vector FileSystemTools::getToolDefinitions() { + return { + { + {"name", "read_file"}, + {"description", "Read content from a file. Returns file content with optional line offset and limit."}, + {"input_schema", { + {"type", "object"}, + {"properties", { + {"path", {{"type", "string"}, {"description", "Absolute path to the file"}}}, + {"offset", {{"type", "integer"}, {"description", "Line number to start from (0-based)"}}}, + {"limit", {{"type", "integer"}, {"description", "Maximum number of lines to read"}}} + }}, + {"required", json::array({"path"})} + }} + }, + { + {"name", "write_file"}, + {"description", "Write content to a file. Creates the file if it doesn't exist, overwrites if it does."}, + {"input_schema", { + {"type", "object"}, + {"properties", { + {"path", {{"type", "string"}, {"description", "Absolute path to the file"}}}, + {"content", {{"type", "string"}, {"description", "Content to write to the file"}}} + }}, + {"required", json::array({"path", "content"})} + }} + }, + { + {"name", "edit_file"}, + {"description", "Replace an exact string in a file. The old_string must be unique in the file."}, + {"input_schema", { + {"type", "object"}, + {"properties", { + {"path", {{"type", "string"}, {"description", "Absolute path to the file"}}}, + {"old_string", {{"type", "string"}, {"description", "The exact string to find and replace"}}}, + {"new_string", {{"type", "string"}, {"description", "The string to replace with"}}}, + {"replace_all", {{"type", "boolean"}, {"description", "Replace all occurrences (default: false)"}}} + }}, + {"required", json::array({"path", "old_string", "new_string"})} + }} + }, + { + {"name", "list_directory"}, + {"description", "List contents of a directory."}, + {"input_schema", { + {"type", "object"}, + {"properties", { + {"path", {{"type", "string"}, {"description", "Absolute path to the directory"}}} + }}, + {"required", json::array({"path"})} + }} + }, + { + {"name", "glob_files"}, + {"description", "Search for files matching a glob pattern (e.g., **/*.cpp)."}, + {"input_schema", { + {"type", "object"}, + {"properties", { + {"pattern", {{"type", "string"}, {"description", "Glob pattern to match"}}}, + {"path", {{"type", "string"}, {"description", "Base directory to search in"}}} + }}, + {"required", json::array({"pattern"})} + }} + }, + { + {"name", "grep_files"}, + {"description", "Search for content matching a regex pattern in files."}, + {"input_schema", { + {"type", "object"}, + {"properties", { + {"pattern", {{"type", "string"}, {"description", "Regex pattern to search for"}}}, + {"path", {{"type", "string"}, {"description", "Directory or file to search in"}}}, + {"glob", {{"type", "string"}, {"description", "Optional glob pattern to filter files"}}} + }}, + {"required", json::array({"pattern"})} + }} + } + }; +} + +// ============================================================================ +// Tool Execution +// ============================================================================ + +bool FileSystemTools::isFileSystemTool(const std::string& toolName) { + static const std::vector tools = { + "read_file", "write_file", "edit_file", + "list_directory", "glob_files", "grep_files" + }; + return std::find(tools.begin(), tools.end(), toolName) != tools.end(); +} + +json FileSystemTools::execute(const std::string& toolName, const json& params) { + try { + if (toolName == "read_file") return readFile(params); + if (toolName == "write_file") return writeFile(params); + if (toolName == "edit_file") return editFile(params); + if (toolName == "list_directory") return listDirectory(params); + if (toolName == "glob_files") return globFiles(params); + if (toolName == "grep_files") return grepFiles(params); + return makeError("Unknown tool: " + toolName); + } catch (const std::exception& e) { + return makeError(std::string("Exception: ") + e.what()); + } +} + +// ============================================================================ +// Tool Implementations +// ============================================================================ + +json FileSystemTools::readFile(const json& params) { + if (!params.contains("path")) { + return makeError("Missing required parameter: path"); + } + + std::string path = params["path"].get(); + std::string canonPath = canonicalizePath(path); + + if (!isPathAllowed(canonPath)) { + return makeError("Path not allowed: " + path); + } + + if (isPatternBlocked(canonPath)) { + return makeError("File pattern blocked for security: " + path); + } + + if (!fs::exists(canonPath)) { + return makeError("File not found: " + path); + } + + if (!fs::is_regular_file(canonPath)) { + return makeError("Not a regular file: " + path); + } + + auto fileSize = fs::file_size(canonPath); + if (fileSize > s_maxReadSize) { + return makeError("File too large: " + std::to_string(fileSize) + " bytes (max: " + std::to_string(s_maxReadSize) + ")"); + } + + std::ifstream file(canonPath); + if (!file.is_open()) { + return makeError("Cannot open file: " + path); + } + + int offset = params.value("offset", 0); + int limit = params.value("limit", -1); + + std::vector lines; + std::string line; + int lineNum = 0; + + while (std::getline(file, line)) { + if (lineNum >= offset) { + if (limit > 0 && static_cast(lines.size()) >= limit) { + break; + } + lines.push_back(line); + } + lineNum++; + } + + // Build numbered output like Claude Code + std::ostringstream content; + for (size_t i = 0; i < lines.size(); i++) { + content << (offset + i + 1) << "\t" << lines[i] << "\n"; + } + + return makeSuccess({ + {"content", content.str()}, + {"lines_read", lines.size()}, + {"total_lines", lineNum}, + {"path", canonPath} + }); +} + +json FileSystemTools::writeFile(const json& params) { + if (!params.contains("path") || !params.contains("content")) { + return makeError("Missing required parameters: path, content"); + } + + std::string path = params["path"].get(); + std::string content = params["content"].get(); + std::string canonPath = canonicalizePath(path); + + if (!isPathAllowed(canonPath)) { + return makeError("Path not allowed: " + path); + } + + if (isPatternBlocked(canonPath)) { + return makeError("File pattern blocked for security: " + path); + } + + if (content.size() > s_maxWriteSize) { + return makeError("Content too large: " + std::to_string(content.size()) + " bytes (max: " + std::to_string(s_maxWriteSize) + ")"); + } + + // Create parent directories if needed + fs::path filePath(canonPath); + if (filePath.has_parent_path()) { + fs::create_directories(filePath.parent_path()); + } + + std::ofstream file(canonPath); + if (!file.is_open()) { + return makeError("Cannot create/open file: " + path); + } + + file << content; + file.close(); + + return makeSuccess({ + {"message", "File written successfully"}, + {"path", canonPath}, + {"bytes_written", content.size()} + }); +} + +json FileSystemTools::editFile(const json& params) { + if (!params.contains("path") || !params.contains("old_string") || !params.contains("new_string")) { + return makeError("Missing required parameters: path, old_string, new_string"); + } + + std::string path = params["path"].get(); + std::string oldString = params["old_string"].get(); + std::string newString = params["new_string"].get(); + bool replaceAll = params.value("replace_all", false); + std::string canonPath = canonicalizePath(path); + + if (!isPathAllowed(canonPath)) { + return makeError("Path not allowed: " + path); + } + + if (isPatternBlocked(canonPath)) { + return makeError("File pattern blocked for security: " + path); + } + + if (!fs::exists(canonPath)) { + return makeError("File not found: " + path); + } + + // Read file content + std::ifstream inFile(canonPath); + if (!inFile.is_open()) { + return makeError("Cannot open file: " + path); + } + + std::ostringstream buffer; + buffer << inFile.rdbuf(); + std::string content = buffer.str(); + inFile.close(); + + // Count occurrences + size_t count = 0; + size_t pos = 0; + while ((pos = content.find(oldString, pos)) != std::string::npos) { + count++; + pos += oldString.length(); + } + + if (count == 0) { + return makeError("String not found in file: " + oldString.substr(0, 50) + (oldString.length() > 50 ? "..." : "")); + } + + if (count > 1 && !replaceAll) { + return makeError("String found " + std::to_string(count) + " times. Use replace_all=true to replace all, or provide more context to make it unique."); + } + + // Replace + std::string newContent; + if (replaceAll) { + newContent = content; + pos = 0; + while ((pos = newContent.find(oldString, pos)) != std::string::npos) { + newContent.replace(pos, oldString.length(), newString); + pos += newString.length(); + } + } else { + pos = content.find(oldString); + newContent = content.substr(0, pos) + newString + content.substr(pos + oldString.length()); + } + + // Write back + std::ofstream outFile(canonPath); + if (!outFile.is_open()) { + return makeError("Cannot write file: " + path); + } + outFile << newContent; + outFile.close(); + + return makeSuccess({ + {"message", "File edited successfully"}, + {"path", canonPath}, + {"replacements", replaceAll ? count : 1} + }); +} + +json FileSystemTools::listDirectory(const json& params) { + if (!params.contains("path")) { + return makeError("Missing required parameter: path"); + } + + std::string path = params["path"].get(); + std::string canonPath = canonicalizePath(path); + + if (!isPathAllowed(canonPath)) { + return makeError("Path not allowed: " + path); + } + + if (!fs::exists(canonPath)) { + return makeError("Directory not found: " + path); + } + + if (!fs::is_directory(canonPath)) { + return makeError("Not a directory: " + path); + } + + json entries = json::array(); + for (const auto& entry : fs::directory_iterator(canonPath)) { + json item; + item["name"] = entry.path().filename().string(); + item["type"] = entry.is_directory() ? "directory" : "file"; + if (entry.is_regular_file()) { + item["size"] = entry.file_size(); + } + entries.push_back(item); + } + + return makeSuccess({ + {"path", canonPath}, + {"entries", entries}, + {"count", entries.size()} + }); +} + +json FileSystemTools::globFiles(const json& params) { + if (!params.contains("pattern")) { + return makeError("Missing required parameter: pattern"); + } + + std::string pattern = params["pattern"].get(); + std::string basePath = params.value("path", fs::current_path().string()); + std::string canonBase = canonicalizePath(basePath); + + if (!isPathAllowed(canonBase)) { + return makeError("Path not allowed: " + basePath); + } + + if (!fs::exists(canonBase) || !fs::is_directory(canonBase)) { + return makeError("Base directory not found: " + basePath); + } + + // Simple glob implementation (supports * and **) + json matches = json::array(); + bool recursive = pattern.find("**") != std::string::npos; + + // Convert glob to regex + std::string regexPattern = pattern; + // Escape special regex chars except * and ? + for (auto c : {'\\', '.', '+', '^', '$', '|', '(', ')', '[', ']', '{', '}'}) { + size_t pos = 0; + while ((pos = regexPattern.find(c, pos)) != std::string::npos) { + regexPattern.insert(pos, "\\"); + pos += 2; + } + } + // Convert glob wildcards to regex + size_t pos = 0; + while ((pos = regexPattern.find("**", pos)) != std::string::npos) { + regexPattern.replace(pos, 2, ".*"); + pos += 2; + } + pos = 0; + while ((pos = regexPattern.find("*", pos)) != std::string::npos) { + if (pos == 0 || regexPattern[pos-1] != '.') { + regexPattern.replace(pos, 1, "[^/]*"); + pos += 5; + } else { + pos++; + } + } + pos = 0; + while ((pos = regexPattern.find("?", pos)) != std::string::npos) { + regexPattern.replace(pos, 1, "."); + pos++; + } + + std::regex rx(regexPattern, std::regex::icase); + + auto iterator = recursive ? + fs::recursive_directory_iterator(canonBase) : + fs::recursive_directory_iterator(canonBase); + + for (const auto& entry : fs::recursive_directory_iterator(canonBase)) { + if (entry.is_regular_file()) { + std::string relPath = fs::relative(entry.path(), canonBase).string(); + if (std::regex_match(relPath, rx)) { + matches.push_back(entry.path().string()); + if (matches.size() >= 100) break; // Limit results + } + } + } + + return makeSuccess({ + {"pattern", pattern}, + {"base_path", canonBase}, + {"matches", matches}, + {"count", matches.size()} + }); +} + +json FileSystemTools::grepFiles(const json& params) { + if (!params.contains("pattern")) { + return makeError("Missing required parameter: pattern"); + } + + std::string pattern = params["pattern"].get(); + std::string searchPath = params.value("path", fs::current_path().string()); + std::string glob = params.value("glob", "*"); + std::string canonPath = canonicalizePath(searchPath); + + if (!isPathAllowed(canonPath)) { + return makeError("Path not allowed: " + searchPath); + } + + std::regex rx; + try { + rx = std::regex(pattern); + } catch (const std::regex_error& e) { + return makeError("Invalid regex pattern: " + std::string(e.what())); + } + + json results = json::array(); + size_t totalMatches = 0; + + auto searchFile = [&](const fs::path& filePath) { + if (isPatternBlocked(filePath.string())) return; + + std::ifstream file(filePath); + if (!file.is_open()) return; + + std::string line; + int lineNum = 0; + while (std::getline(file, line)) { + lineNum++; + if (std::regex_search(line, rx)) { + results.push_back({ + {"file", filePath.string()}, + {"line", lineNum}, + {"content", line.substr(0, 200)} // Truncate long lines + }); + totalMatches++; + if (totalMatches >= 50) return; // Limit results + } + } + }; + + if (fs::is_regular_file(canonPath)) { + searchFile(canonPath); + } else if (fs::is_directory(canonPath)) { + for (const auto& entry : fs::recursive_directory_iterator(canonPath)) { + if (entry.is_regular_file()) { + // Simple glob check + std::string filename = entry.path().filename().string(); + bool match = (glob == "*") || (filename.find(glob.substr(1)) != std::string::npos); + if (match) { + searchFile(entry.path()); + if (totalMatches >= 50) break; + } + } + } + } + + return makeSuccess({ + {"pattern", pattern}, + {"path", canonPath}, + {"results", results}, + {"total_matches", totalMatches} + }); +} + +// ============================================================================ +// Security Helpers +// ============================================================================ + +std::string FileSystemTools::canonicalizePath(const std::string& path) { + try { + if (fs::exists(path)) { + return fs::canonical(path).string(); + } + // For non-existent paths, normalize as much as possible + return fs::absolute(path).lexically_normal().string(); + } catch (...) { + return fs::absolute(path).string(); + } +} + +bool FileSystemTools::isPathAllowed(const std::string& path) { + // If no allowed paths configured, allow all + if (s_allowedPaths.empty()) { + return true; + } + + for (const auto& allowed : s_allowedPaths) { + std::string canonAllowed = canonicalizePath(allowed); + if (path.find(canonAllowed) == 0) { + return true; + } + } + return false; +} + +bool FileSystemTools::isPatternBlocked(const std::string& path) { + fs::path filePath(path); + std::string filename = filePath.filename().string(); + + for (const auto& pattern : s_blockedPatterns) { + // Simple wildcard matching + std::string regexPattern = pattern; + size_t pos = 0; + while ((pos = regexPattern.find("*", pos)) != std::string::npos) { + regexPattern.replace(pos, 1, ".*"); + pos += 2; + } + try { + std::regex rx(regexPattern, std::regex::icase); + if (std::regex_match(filename, rx)) { + return true; + } + } catch (...) { + // Invalid pattern, skip + } + } + return false; +} + +// ============================================================================ +// Configuration +// ============================================================================ + +void FileSystemTools::setAllowedPaths(const std::vector& paths) { + s_allowedPaths = paths; +} + +void FileSystemTools::setBlockedPatterns(const std::vector& patterns) { + s_blockedPatterns = patterns; +} + +void FileSystemTools::setMaxReadSize(size_t bytes) { + s_maxReadSize = bytes; +} + +void FileSystemTools::setMaxWriteSize(size_t bytes) { + s_maxWriteSize = bytes; +} + +// ============================================================================ +// Helpers +// ============================================================================ + +json FileSystemTools::makeError(const std::string& message) { + return { + {"success", false}, + {"error", message} + }; +} + +json FileSystemTools::makeSuccess(const json& content) { + json result = content; + result["success"] = true; + return result; +} + +} // namespace aissia::tools diff --git a/src/shared/tools/FileSystemTools.hpp b/src/shared/tools/FileSystemTools.hpp new file mode 100644 index 0000000..ca96610 --- /dev/null +++ b/src/shared/tools/FileSystemTools.hpp @@ -0,0 +1,74 @@ +#pragma once + +#include +#include +#include +#include + +namespace aissia::tools { + +using json = nlohmann::json; + +/** + * @brief FileSystem tools for agentic LLM (Claude Code style) + * + * Tools: + * - read_file: Read file content with optional offset/limit + * - write_file: Write/replace entire file + * - edit_file: Replace exact string in file + * - list_directory: List directory contents + * - glob_files: Search files by pattern + * - grep_files: Search content with regex + */ +class FileSystemTools { +public: + /** + * @brief Get tool definitions for LLM + */ + static std::vector getToolDefinitions(); + + /** + * @brief Execute a tool by name + * @param toolName Name of the tool + * @param params Tool parameters + * @return Result JSON with success/error + */ + static json execute(const std::string& toolName, const json& params); + + /** + * @brief Check if tool name is a FileSystem tool + */ + static bool isFileSystemTool(const std::string& toolName); + + // Configuration + static void setAllowedPaths(const std::vector& paths); + static void setBlockedPatterns(const std::vector& patterns); + static void setMaxReadSize(size_t bytes); + static void setMaxWriteSize(size_t bytes); + +private: + // Tool implementations + static json readFile(const json& params); + static json writeFile(const json& params); + static json editFile(const json& params); + static json listDirectory(const json& params); + static json globFiles(const json& params); + static json grepFiles(const json& params); + + // Security helpers + static bool isPathAllowed(const std::string& path); + static bool isPatternBlocked(const std::string& path); + static std::string canonicalizePath(const std::string& path); + + // Error helpers + static json makeError(const std::string& message); + static json makeSuccess(const json& content); + + // Configuration (static for simplicity) + static std::vector s_allowedPaths; + static std::vector s_blockedPatterns; + static size_t s_maxReadSize; + static size_t s_maxWriteSize; +}; + +} // namespace aissia::tools