feat: Implement FileSystem tools for agentic LLM

Add Claude Code style file manipulation tools:
- read_file: Read file content with line numbers (offset/limit support)
- write_file: Create or overwrite files
- edit_file: Replace exact string in file (unique match or replace_all)
- list_directory: List directory contents with type/size
- glob_files: Search files by pattern (**/*.cpp)
- grep_files: Search content with regex

Features:
- Security: configurable allowed paths, blocked patterns (*.env, *.key)
- Size limits: 1MB read, 10MB write
- Path canonicalization to prevent traversal attacks
- Integrated into LLMService tool registry

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
StillHammer 2025-11-27 14:01:25 +08:00
parent 64d485729b
commit 37b62b55e8
5 changed files with 687 additions and 2 deletions

View File

@ -73,9 +73,10 @@ if(OPENSSL_FOUND)
target_compile_definitions(AissiaLLM PRIVATE CPPHTTPLIB_OPENSSL_SUPPORT)
endif()
# Tools Library (Internal tools + MCP client)
# Tools Library (Internal tools + FileSystem tools + MCP client)
add_library(AissiaTools STATIC
src/shared/tools/InternalTools.cpp
src/shared/tools/FileSystemTools.cpp
src/shared/mcp/StdioTransport.cpp
src/shared/mcp/MCPClient.cpp
)

View File

@ -83,7 +83,21 @@ void LLMService::initializeTools() {
m_logger->info("Registered {} internal tools", m_internalTools->size());
}
// 2. MCP tools (via external servers)
// 2. FileSystem tools (direct C++ execution)
for (const auto& toolDef : tools::FileSystemTools::getToolDefinitions()) {
std::string toolName = toolDef["name"].get<std::string>();
m_toolRegistry.registerTool(
toolName,
toolDef["description"].get<std::string>(),
toolDef["input_schema"],
[toolName](const nlohmann::json& input) -> nlohmann::json {
return tools::FileSystemTools::execute(toolName, input);
}
);
}
m_logger->info("Registered {} filesystem tools", tools::FileSystemTools::getToolDefinitions().size());
// 3. MCP tools (via external servers)
m_mcpClient = std::make_unique<mcp::MCPClient>();
if (loadMCPConfig("config/mcp.json")) {
int connected = m_mcpClient->connectAll();

View File

@ -4,6 +4,7 @@
#include "../shared/llm/ILLMProvider.hpp"
#include "../shared/llm/ToolRegistry.hpp"
#include "../shared/tools/InternalTools.hpp"
#include "../shared/tools/FileSystemTools.hpp"
#include "../shared/mcp/MCPClient.hpp"
#include <grove/IIO.h>

View File

@ -0,0 +1,595 @@
#include "FileSystemTools.hpp"
#include <filesystem>
#include <fstream>
#include <sstream>
#include <regex>
#include <algorithm>
namespace fs = std::filesystem;
namespace aissia::tools {
// Static configuration defaults
std::vector<std::string> FileSystemTools::s_allowedPaths = {};
std::vector<std::string> FileSystemTools::s_blockedPatterns = {"*.env", "*credentials*", "*.key", "*.pem"};
size_t FileSystemTools::s_maxReadSize = 1024 * 1024; // 1MB
size_t FileSystemTools::s_maxWriteSize = 10 * 1024 * 1024; // 10MB
// ============================================================================
// Tool Definitions
// ============================================================================
std::vector<json> FileSystemTools::getToolDefinitions() {
return {
{
{"name", "read_file"},
{"description", "Read content from a file. Returns file content with optional line offset and limit."},
{"input_schema", {
{"type", "object"},
{"properties", {
{"path", {{"type", "string"}, {"description", "Absolute path to the file"}}},
{"offset", {{"type", "integer"}, {"description", "Line number to start from (0-based)"}}},
{"limit", {{"type", "integer"}, {"description", "Maximum number of lines to read"}}}
}},
{"required", json::array({"path"})}
}}
},
{
{"name", "write_file"},
{"description", "Write content to a file. Creates the file if it doesn't exist, overwrites if it does."},
{"input_schema", {
{"type", "object"},
{"properties", {
{"path", {{"type", "string"}, {"description", "Absolute path to the file"}}},
{"content", {{"type", "string"}, {"description", "Content to write to the file"}}}
}},
{"required", json::array({"path", "content"})}
}}
},
{
{"name", "edit_file"},
{"description", "Replace an exact string in a file. The old_string must be unique in the file."},
{"input_schema", {
{"type", "object"},
{"properties", {
{"path", {{"type", "string"}, {"description", "Absolute path to the file"}}},
{"old_string", {{"type", "string"}, {"description", "The exact string to find and replace"}}},
{"new_string", {{"type", "string"}, {"description", "The string to replace with"}}},
{"replace_all", {{"type", "boolean"}, {"description", "Replace all occurrences (default: false)"}}}
}},
{"required", json::array({"path", "old_string", "new_string"})}
}}
},
{
{"name", "list_directory"},
{"description", "List contents of a directory."},
{"input_schema", {
{"type", "object"},
{"properties", {
{"path", {{"type", "string"}, {"description", "Absolute path to the directory"}}}
}},
{"required", json::array({"path"})}
}}
},
{
{"name", "glob_files"},
{"description", "Search for files matching a glob pattern (e.g., **/*.cpp)."},
{"input_schema", {
{"type", "object"},
{"properties", {
{"pattern", {{"type", "string"}, {"description", "Glob pattern to match"}}},
{"path", {{"type", "string"}, {"description", "Base directory to search in"}}}
}},
{"required", json::array({"pattern"})}
}}
},
{
{"name", "grep_files"},
{"description", "Search for content matching a regex pattern in files."},
{"input_schema", {
{"type", "object"},
{"properties", {
{"pattern", {{"type", "string"}, {"description", "Regex pattern to search for"}}},
{"path", {{"type", "string"}, {"description", "Directory or file to search in"}}},
{"glob", {{"type", "string"}, {"description", "Optional glob pattern to filter files"}}}
}},
{"required", json::array({"pattern"})}
}}
}
};
}
// ============================================================================
// Tool Execution
// ============================================================================
bool FileSystemTools::isFileSystemTool(const std::string& toolName) {
static const std::vector<std::string> tools = {
"read_file", "write_file", "edit_file",
"list_directory", "glob_files", "grep_files"
};
return std::find(tools.begin(), tools.end(), toolName) != tools.end();
}
json FileSystemTools::execute(const std::string& toolName, const json& params) {
try {
if (toolName == "read_file") return readFile(params);
if (toolName == "write_file") return writeFile(params);
if (toolName == "edit_file") return editFile(params);
if (toolName == "list_directory") return listDirectory(params);
if (toolName == "glob_files") return globFiles(params);
if (toolName == "grep_files") return grepFiles(params);
return makeError("Unknown tool: " + toolName);
} catch (const std::exception& e) {
return makeError(std::string("Exception: ") + e.what());
}
}
// ============================================================================
// Tool Implementations
// ============================================================================
json FileSystemTools::readFile(const json& params) {
if (!params.contains("path")) {
return makeError("Missing required parameter: path");
}
std::string path = params["path"].get<std::string>();
std::string canonPath = canonicalizePath(path);
if (!isPathAllowed(canonPath)) {
return makeError("Path not allowed: " + path);
}
if (isPatternBlocked(canonPath)) {
return makeError("File pattern blocked for security: " + path);
}
if (!fs::exists(canonPath)) {
return makeError("File not found: " + path);
}
if (!fs::is_regular_file(canonPath)) {
return makeError("Not a regular file: " + path);
}
auto fileSize = fs::file_size(canonPath);
if (fileSize > s_maxReadSize) {
return makeError("File too large: " + std::to_string(fileSize) + " bytes (max: " + std::to_string(s_maxReadSize) + ")");
}
std::ifstream file(canonPath);
if (!file.is_open()) {
return makeError("Cannot open file: " + path);
}
int offset = params.value("offset", 0);
int limit = params.value("limit", -1);
std::vector<std::string> lines;
std::string line;
int lineNum = 0;
while (std::getline(file, line)) {
if (lineNum >= offset) {
if (limit > 0 && static_cast<int>(lines.size()) >= limit) {
break;
}
lines.push_back(line);
}
lineNum++;
}
// Build numbered output like Claude Code
std::ostringstream content;
for (size_t i = 0; i < lines.size(); i++) {
content << (offset + i + 1) << "\t" << lines[i] << "\n";
}
return makeSuccess({
{"content", content.str()},
{"lines_read", lines.size()},
{"total_lines", lineNum},
{"path", canonPath}
});
}
json FileSystemTools::writeFile(const json& params) {
if (!params.contains("path") || !params.contains("content")) {
return makeError("Missing required parameters: path, content");
}
std::string path = params["path"].get<std::string>();
std::string content = params["content"].get<std::string>();
std::string canonPath = canonicalizePath(path);
if (!isPathAllowed(canonPath)) {
return makeError("Path not allowed: " + path);
}
if (isPatternBlocked(canonPath)) {
return makeError("File pattern blocked for security: " + path);
}
if (content.size() > s_maxWriteSize) {
return makeError("Content too large: " + std::to_string(content.size()) + " bytes (max: " + std::to_string(s_maxWriteSize) + ")");
}
// Create parent directories if needed
fs::path filePath(canonPath);
if (filePath.has_parent_path()) {
fs::create_directories(filePath.parent_path());
}
std::ofstream file(canonPath);
if (!file.is_open()) {
return makeError("Cannot create/open file: " + path);
}
file << content;
file.close();
return makeSuccess({
{"message", "File written successfully"},
{"path", canonPath},
{"bytes_written", content.size()}
});
}
json FileSystemTools::editFile(const json& params) {
if (!params.contains("path") || !params.contains("old_string") || !params.contains("new_string")) {
return makeError("Missing required parameters: path, old_string, new_string");
}
std::string path = params["path"].get<std::string>();
std::string oldString = params["old_string"].get<std::string>();
std::string newString = params["new_string"].get<std::string>();
bool replaceAll = params.value("replace_all", false);
std::string canonPath = canonicalizePath(path);
if (!isPathAllowed(canonPath)) {
return makeError("Path not allowed: " + path);
}
if (isPatternBlocked(canonPath)) {
return makeError("File pattern blocked for security: " + path);
}
if (!fs::exists(canonPath)) {
return makeError("File not found: " + path);
}
// Read file content
std::ifstream inFile(canonPath);
if (!inFile.is_open()) {
return makeError("Cannot open file: " + path);
}
std::ostringstream buffer;
buffer << inFile.rdbuf();
std::string content = buffer.str();
inFile.close();
// Count occurrences
size_t count = 0;
size_t pos = 0;
while ((pos = content.find(oldString, pos)) != std::string::npos) {
count++;
pos += oldString.length();
}
if (count == 0) {
return makeError("String not found in file: " + oldString.substr(0, 50) + (oldString.length() > 50 ? "..." : ""));
}
if (count > 1 && !replaceAll) {
return makeError("String found " + std::to_string(count) + " times. Use replace_all=true to replace all, or provide more context to make it unique.");
}
// Replace
std::string newContent;
if (replaceAll) {
newContent = content;
pos = 0;
while ((pos = newContent.find(oldString, pos)) != std::string::npos) {
newContent.replace(pos, oldString.length(), newString);
pos += newString.length();
}
} else {
pos = content.find(oldString);
newContent = content.substr(0, pos) + newString + content.substr(pos + oldString.length());
}
// Write back
std::ofstream outFile(canonPath);
if (!outFile.is_open()) {
return makeError("Cannot write file: " + path);
}
outFile << newContent;
outFile.close();
return makeSuccess({
{"message", "File edited successfully"},
{"path", canonPath},
{"replacements", replaceAll ? count : 1}
});
}
json FileSystemTools::listDirectory(const json& params) {
if (!params.contains("path")) {
return makeError("Missing required parameter: path");
}
std::string path = params["path"].get<std::string>();
std::string canonPath = canonicalizePath(path);
if (!isPathAllowed(canonPath)) {
return makeError("Path not allowed: " + path);
}
if (!fs::exists(canonPath)) {
return makeError("Directory not found: " + path);
}
if (!fs::is_directory(canonPath)) {
return makeError("Not a directory: " + path);
}
json entries = json::array();
for (const auto& entry : fs::directory_iterator(canonPath)) {
json item;
item["name"] = entry.path().filename().string();
item["type"] = entry.is_directory() ? "directory" : "file";
if (entry.is_regular_file()) {
item["size"] = entry.file_size();
}
entries.push_back(item);
}
return makeSuccess({
{"path", canonPath},
{"entries", entries},
{"count", entries.size()}
});
}
json FileSystemTools::globFiles(const json& params) {
if (!params.contains("pattern")) {
return makeError("Missing required parameter: pattern");
}
std::string pattern = params["pattern"].get<std::string>();
std::string basePath = params.value("path", fs::current_path().string());
std::string canonBase = canonicalizePath(basePath);
if (!isPathAllowed(canonBase)) {
return makeError("Path not allowed: " + basePath);
}
if (!fs::exists(canonBase) || !fs::is_directory(canonBase)) {
return makeError("Base directory not found: " + basePath);
}
// Simple glob implementation (supports * and **)
json matches = json::array();
bool recursive = pattern.find("**") != std::string::npos;
// Convert glob to regex
std::string regexPattern = pattern;
// Escape special regex chars except * and ?
for (auto c : {'\\', '.', '+', '^', '$', '|', '(', ')', '[', ']', '{', '}'}) {
size_t pos = 0;
while ((pos = regexPattern.find(c, pos)) != std::string::npos) {
regexPattern.insert(pos, "\\");
pos += 2;
}
}
// Convert glob wildcards to regex
size_t pos = 0;
while ((pos = regexPattern.find("**", pos)) != std::string::npos) {
regexPattern.replace(pos, 2, ".*");
pos += 2;
}
pos = 0;
while ((pos = regexPattern.find("*", pos)) != std::string::npos) {
if (pos == 0 || regexPattern[pos-1] != '.') {
regexPattern.replace(pos, 1, "[^/]*");
pos += 5;
} else {
pos++;
}
}
pos = 0;
while ((pos = regexPattern.find("?", pos)) != std::string::npos) {
regexPattern.replace(pos, 1, ".");
pos++;
}
std::regex rx(regexPattern, std::regex::icase);
auto iterator = recursive ?
fs::recursive_directory_iterator(canonBase) :
fs::recursive_directory_iterator(canonBase);
for (const auto& entry : fs::recursive_directory_iterator(canonBase)) {
if (entry.is_regular_file()) {
std::string relPath = fs::relative(entry.path(), canonBase).string();
if (std::regex_match(relPath, rx)) {
matches.push_back(entry.path().string());
if (matches.size() >= 100) break; // Limit results
}
}
}
return makeSuccess({
{"pattern", pattern},
{"base_path", canonBase},
{"matches", matches},
{"count", matches.size()}
});
}
json FileSystemTools::grepFiles(const json& params) {
if (!params.contains("pattern")) {
return makeError("Missing required parameter: pattern");
}
std::string pattern = params["pattern"].get<std::string>();
std::string searchPath = params.value("path", fs::current_path().string());
std::string glob = params.value("glob", "*");
std::string canonPath = canonicalizePath(searchPath);
if (!isPathAllowed(canonPath)) {
return makeError("Path not allowed: " + searchPath);
}
std::regex rx;
try {
rx = std::regex(pattern);
} catch (const std::regex_error& e) {
return makeError("Invalid regex pattern: " + std::string(e.what()));
}
json results = json::array();
size_t totalMatches = 0;
auto searchFile = [&](const fs::path& filePath) {
if (isPatternBlocked(filePath.string())) return;
std::ifstream file(filePath);
if (!file.is_open()) return;
std::string line;
int lineNum = 0;
while (std::getline(file, line)) {
lineNum++;
if (std::regex_search(line, rx)) {
results.push_back({
{"file", filePath.string()},
{"line", lineNum},
{"content", line.substr(0, 200)} // Truncate long lines
});
totalMatches++;
if (totalMatches >= 50) return; // Limit results
}
}
};
if (fs::is_regular_file(canonPath)) {
searchFile(canonPath);
} else if (fs::is_directory(canonPath)) {
for (const auto& entry : fs::recursive_directory_iterator(canonPath)) {
if (entry.is_regular_file()) {
// Simple glob check
std::string filename = entry.path().filename().string();
bool match = (glob == "*") || (filename.find(glob.substr(1)) != std::string::npos);
if (match) {
searchFile(entry.path());
if (totalMatches >= 50) break;
}
}
}
}
return makeSuccess({
{"pattern", pattern},
{"path", canonPath},
{"results", results},
{"total_matches", totalMatches}
});
}
// ============================================================================
// Security Helpers
// ============================================================================
std::string FileSystemTools::canonicalizePath(const std::string& path) {
try {
if (fs::exists(path)) {
return fs::canonical(path).string();
}
// For non-existent paths, normalize as much as possible
return fs::absolute(path).lexically_normal().string();
} catch (...) {
return fs::absolute(path).string();
}
}
bool FileSystemTools::isPathAllowed(const std::string& path) {
// If no allowed paths configured, allow all
if (s_allowedPaths.empty()) {
return true;
}
for (const auto& allowed : s_allowedPaths) {
std::string canonAllowed = canonicalizePath(allowed);
if (path.find(canonAllowed) == 0) {
return true;
}
}
return false;
}
bool FileSystemTools::isPatternBlocked(const std::string& path) {
fs::path filePath(path);
std::string filename = filePath.filename().string();
for (const auto& pattern : s_blockedPatterns) {
// Simple wildcard matching
std::string regexPattern = pattern;
size_t pos = 0;
while ((pos = regexPattern.find("*", pos)) != std::string::npos) {
regexPattern.replace(pos, 1, ".*");
pos += 2;
}
try {
std::regex rx(regexPattern, std::regex::icase);
if (std::regex_match(filename, rx)) {
return true;
}
} catch (...) {
// Invalid pattern, skip
}
}
return false;
}
// ============================================================================
// Configuration
// ============================================================================
void FileSystemTools::setAllowedPaths(const std::vector<std::string>& paths) {
s_allowedPaths = paths;
}
void FileSystemTools::setBlockedPatterns(const std::vector<std::string>& patterns) {
s_blockedPatterns = patterns;
}
void FileSystemTools::setMaxReadSize(size_t bytes) {
s_maxReadSize = bytes;
}
void FileSystemTools::setMaxWriteSize(size_t bytes) {
s_maxWriteSize = bytes;
}
// ============================================================================
// Helpers
// ============================================================================
json FileSystemTools::makeError(const std::string& message) {
return {
{"success", false},
{"error", message}
};
}
json FileSystemTools::makeSuccess(const json& content) {
json result = content;
result["success"] = true;
return result;
}
} // namespace aissia::tools

View File

@ -0,0 +1,74 @@
#pragma once
#include <nlohmann/json.hpp>
#include <string>
#include <vector>
#include <optional>
namespace aissia::tools {
using json = nlohmann::json;
/**
* @brief FileSystem tools for agentic LLM (Claude Code style)
*
* Tools:
* - read_file: Read file content with optional offset/limit
* - write_file: Write/replace entire file
* - edit_file: Replace exact string in file
* - list_directory: List directory contents
* - glob_files: Search files by pattern
* - grep_files: Search content with regex
*/
class FileSystemTools {
public:
/**
* @brief Get tool definitions for LLM
*/
static std::vector<json> getToolDefinitions();
/**
* @brief Execute a tool by name
* @param toolName Name of the tool
* @param params Tool parameters
* @return Result JSON with success/error
*/
static json execute(const std::string& toolName, const json& params);
/**
* @brief Check if tool name is a FileSystem tool
*/
static bool isFileSystemTool(const std::string& toolName);
// Configuration
static void setAllowedPaths(const std::vector<std::string>& paths);
static void setBlockedPatterns(const std::vector<std::string>& patterns);
static void setMaxReadSize(size_t bytes);
static void setMaxWriteSize(size_t bytes);
private:
// Tool implementations
static json readFile(const json& params);
static json writeFile(const json& params);
static json editFile(const json& params);
static json listDirectory(const json& params);
static json globFiles(const json& params);
static json grepFiles(const json& params);
// Security helpers
static bool isPathAllowed(const std::string& path);
static bool isPatternBlocked(const std::string& path);
static std::string canonicalizePath(const std::string& path);
// Error helpers
static json makeError(const std::string& message);
static json makeSuccess(const json& content);
// Configuration (static for simplicity)
static std::vector<std::string> s_allowedPaths;
static std::vector<std::string> s_blockedPatterns;
static size_t s_maxReadSize;
static size_t s_maxWriteSize;
};
} // namespace aissia::tools