From d519bf77e9e6f38af69f6fa7ae863b172a6b3301d905fa3b37c631f5e898ac5e Mon Sep 17 00:00:00 2001 From: Harshavardhan Musanalli Date: Sun, 16 Nov 2025 10:29:24 +0100 Subject: [PATCH] working mode --- .gitignore | 3 + Makefile | 67 +++-- README.md | 246 ++++++++++++++----- agent.go | 22 +- docs/INSTALLATION.md | 334 +++++++++++++++++++++++++ install.sh | 403 +++++++++++++++++++++++++++++++ internal/config/config.go | 36 ++- main.go | 81 +++++-- scripts/debug_trace_script.sh | 19 -- scripts/demo_ebpf_integration.sh | 141 ----------- scripts/discover-functions.sh | 51 ---- scripts/ebpf_helper.sh | 296 ----------------------- scripts/install.sh | 85 ------- scripts/integration-tests.sh | 116 --------- 14 files changed, 1070 insertions(+), 830 deletions(-) create mode 100644 docs/INSTALLATION.md create mode 100755 install.sh delete mode 100755 scripts/debug_trace_script.sh delete mode 100755 scripts/demo_ebpf_integration.sh delete mode 100755 scripts/discover-functions.sh delete mode 100755 scripts/ebpf_helper.sh delete mode 100755 scripts/install.sh delete mode 100755 scripts/integration-tests.sh diff --git a/.gitignore b/.gitignore index 3f90785..e299fc8 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,6 @@ go.work.sum nannyagent* nanny-agent* .vscode + +# Build directory +build/ diff --git a/Makefile b/Makefile index d7b060e..05047de 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,21 @@ -.PHONY: build run clean test install +.PHONY: build run clean test install build-prod build-release install-system fmt lint help + +VERSION := 0.0.1 +BUILD_DIR := ./build +BINARY_NAME := nannyagent # Build the application build: - go build -o nanny-agent . + go build -o $(BINARY_NAME) . # Run the application run: build - ./nanny-agent + ./$(BINARY_NAME) # Clean build artifacts clean: - rm -f nanny-agent + rm -f $(BINARY_NAME) + rm -rf $(BUILD_DIR) # Run tests test: @@ -21,14 +26,34 @@ install: go mod tidy go mod download -# Build for production with optimizations +# Build for production with optimizations (current architecture) build-prod: - CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags '-w -s' -o nanny-agent . + CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo \ + -ldflags '-w -s -X main.Version=$(VERSION)' \ + -o $(BINARY_NAME) . + +# Build release binaries for both architectures +build-release: clean + @echo "Building release binaries for version $(VERSION)..." + @mkdir -p $(BUILD_DIR) + @echo "Building for linux/amd64..." + @CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -installsuffix cgo \ + -ldflags '-w -s -X main.Version=$(VERSION)' \ + -o $(BUILD_DIR)/$(BINARY_NAME)-linux-amd64 . + @echo "Building for linux/arm64..." + @CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -a -installsuffix cgo \ + -ldflags '-w -s -X main.Version=$(VERSION)' \ + -o $(BUILD_DIR)/$(BINARY_NAME)-linux-arm64 . + @echo "Generating checksums..." + @cd $(BUILD_DIR) && sha256sum $(BINARY_NAME)-linux-amd64 > $(BINARY_NAME)-linux-amd64.sha256 + @cd $(BUILD_DIR) && sha256sum $(BINARY_NAME)-linux-arm64 > $(BINARY_NAME)-linux-arm64.sha256 + @echo "Build complete! Artifacts in $(BUILD_DIR)/" + @ls -lh $(BUILD_DIR)/ # Install system-wide (requires sudo) install-system: build-prod - sudo cp nanny-agent /usr/local/bin/ - sudo chmod +x /usr/local/bin/nanny-agent + sudo cp $(BINARY_NAME) /usr/local/bin/ + sudo chmod +x /usr/local/bin/$(BINARY_NAME) # Format code fmt: @@ -40,14 +65,18 @@ lint: # Show help help: - @echo "Available commands:" - @echo " build - Build the application" - @echo " run - Build and run the application" - @echo " clean - Clean build artifacts" - @echo " test - Run tests" - @echo " install - Install dependencies" - @echo " build-prod - Build for production" - @echo " install-system- Install system-wide (requires sudo)" - @echo " fmt - Format code" - @echo " lint - Run linter" - @echo " help - Show this help" + @echo "NannyAgent Makefile - Available commands:" + @echo "" + @echo " make build - Build the application for current platform" + @echo " make run - Build and run the application" + @echo " make clean - Clean build artifacts" + @echo " make test - Run tests" + @echo " make install - Install Go dependencies" + @echo " make build-prod - Build for production (optimized, current arch)" + @echo " make build-release - Build release binaries for amd64 and arm64" + @echo " make install-system - Install system-wide (requires sudo)" + @echo " make fmt - Format code" + @echo " make lint - Run linter" + @echo " make help - Show this help" + @echo "" + @echo "Version: $(VERSION)" diff --git a/README.md b/README.md index 083cd50..8bebb7d 100644 --- a/README.md +++ b/README.md @@ -1,96 +1,135 @@ -# Linux Diagnostic Agent +# NannyAgent - Linux Diagnostic Agent -A Go-based AI agent that diagnoses Linux system issues using the NannyAPI gateway with OpenAI-compatible SDK. +A Go-based AI agent that diagnoses Linux system issues using eBPF-powered deep monitoring and TensorZero AI integration. ## Features -- Interactive command-line interface for submitting system issues -- **Automatic system information gathering** - Includes OS, kernel, CPU, memory, network info -- **eBPF-powered deep system monitoring** - Advanced tracing for network, processes, files, and security events -- Integrates with NannyAPI using OpenAI-compatible Go SDK -- Executes diagnostic commands safely and collects output -- Provides step-by-step resolution plans -- **Comprehensive integration tests** with realistic Linux problem scenarios +- ๐Ÿค– **AI-Powered Diagnostics** - Intelligent issue analysis and resolution planning +- ๐Ÿ” **eBPF Deep Monitoring** - Real-time kernel-level tracing for network, processes, files, and security events +- ๐Ÿ›ก๏ธ **Safe Command Execution** - Validates and executes diagnostic commands with timeouts +- ๐Ÿ“Š **Automatic System Information Gathering** - Comprehensive OS, kernel, CPU, memory, and network metrics +- ๐Ÿ”„ **WebSocket Integration** - Real-time communication with backend investigation system +- ๐Ÿ” **OAuth Device Flow Authentication** - Secure agent registration and authentication +- โœ… **Comprehensive Integration Tests** - Realistic Linux problem scenarios -## Setup +## Requirements -1. Clone this repository -2. Copy `.env.example` to `.env` and configure your NannyAPI endpoint: +- **Operating System**: Linux only (no containers/LXC support) +- **Architecture**: amd64 (x86_64) or arm64 (aarch64) +- **Kernel Version**: Linux kernel 5.x or higher +- **Privileges**: Root/sudo access required for eBPF functionality +- **Dependencies**: bpftrace and bpfcc-tools (automatically installed by installer) +- **Network**: Connectivity to Supabase backend + +## Quick Installation + +### One-Line Install (Recommended) + +```bash +# Download and run the installer +curl -fsSL https://your-domain.com/install.sh | sudo bash +``` + +Or download first, then install: + +```bash +# Download the installer +wget https://your-domain.com/install.sh + +# Make it executable +chmod +x install.sh + +# Run the installer +sudo ./install.sh +``` + +### Manual Installation + +1. Clone this repository: ```bash - cp .env.example .env + git clone https://github.com/yourusername/nannyagent.git + cd nannyagent ``` -3. Install dependencies: + +2. Run the installer script: ```bash - go mod tidy - ``` -4. Build and run: - ```bash - make build - ./nanny-agent + sudo ./install.sh ``` +The installer will: +- โœ… Verify system requirements (OS, architecture, kernel version) +- โœ… Check for existing installations +- โœ… Install eBPF tools (bpftrace, bpfcc-tools) +- โœ… Build the nannyagent binary +- โœ… Test connectivity to Supabase +- โœ… Install to `/usr/local/bin/nannyagent` +- โœ… Create configuration in `/etc/nannyagent/config.env` +- โœ… Create secure data directory `/var/lib/nannyagent` + ## Configuration -The agent can be configured using environment variables: +After installation, configure your Supabase URL: -- `NANNYAPI_ENDPOINT`: The NannyAPI endpoint (default: `http://tensorzero.netcup.internal:3000/openai/v1`) -- `NANNYAPI_MODEL`: The model identifier (default: `nannyapi::function_name::diagnose_and_heal`) +```bash +# Edit the configuration file +sudo nano /etc/nannyagent/config.env +``` -## Installation on Linux VM +Required configuration: -### Direct Installation +```bash +# Supabase Configuration +SUPABASE_PROJECT_URL=https://your-project.supabase.co -1. **Install Go** (if not already installed): - ```bash - # For Ubuntu/Debian - sudo apt update - sudo apt install golang-go +# Optional Configuration +TOKEN_PATH=/var/lib/nannyagent/token.json +DEBUG=false +``` - # For RHEL/CentOS/Fedora - sudo dnf install golang - # or - sudo yum install golang - ``` +## Command-Line Options -2. **Clone and build the agent**: - ```bash - git clone - cd nannyagentv2 - go mod tidy - make build - ``` +```bash +# Show version (no sudo required) +nannyagent --version +nannyagent -v -3. **Install as system service** (optional): - ```bash - sudo cp nanny-agent /usr/local/bin/ - sudo chmod +x /usr/local/bin/nanny-agent - ``` +# Show help (no sudo required) +nannyagent --help +nannyagent -h -4. **Set environment variables**: - ```bash - export NANNYAPI_ENDPOINT="http://your-nannyapi-endpoint:3000/openai/v1" - export NANNYAPI_MODEL="your-model-identifier" - ``` +# Run the agent (requires sudo) +sudo nannyagent +``` ## Usage -1. Start the agent: +1. **First-time Setup** - Authenticate the agent: ```bash - ./nanny-agent + sudo nannyagent ``` + + The agent will display a verification URL and code. Visit the URL and enter the code to authorize the agent. -2. Enter a system issue description when prompted: +2. **Interactive Diagnostics** - After authentication, enter system issues: ``` > On /var filesystem I cannot create any file but df -h shows 30% free space available. ``` -3. The agent will: - - Send the issue to the AI via NannyAPI using OpenAI SDK - - Execute diagnostic commands as suggested by the AI - - Provide command outputs back to the AI - - Display the final diagnosis and resolution plan +3. **The agent will**: + - Gather comprehensive system information automatically + - Send the issue to AI for analysis via TensorZero + - Execute diagnostic commands safely + - Run eBPF traces for deep kernel-level monitoring + - Provide AI-generated root cause analysis and resolution plan -4. Type `quit` or `exit` to stop the agent +4. **Exit the agent**: + ``` + > quit + ``` + or + ``` + > exit + ``` ## How It Works @@ -119,14 +158,87 @@ The agent includes comprehensive integration tests that simulate realistic Linux ### Run Integration Tests: ```bash -# Interactive test scenarios -./test-examples.sh +# Run unit tests +make test -# Automated integration tests -./integration-tests.sh +# Run integration tests +./tests/test_ebpf_integration.sh +``` -# Function discovery (find valid NannyAPI functions) -./discover-functions.sh +## Installation Exit Codes + +The installer uses specific exit codes for different failure scenarios: + +| Exit Code | Description | +|-----------|-------------| +| 0 | Success | +| 1 | Not running as root | +| 2 | Unsupported operating system (non-Linux) | +| 3 | Unsupported architecture (not amd64/arm64) | +| 4 | Container/LXC environment detected | +| 5 | Kernel version < 5.x | +| 6 | Existing installation detected | +| 7 | eBPF tools installation failed | +| 8 | Go not installed | +| 9 | Binary build failed | +| 10 | Directory creation failed | +| 11 | Binary installation failed | + +## Troubleshooting + +### Installation Issues + +**Error: "Kernel version X.X is not supported"** +- NannyAgent requires Linux kernel 5.x or higher +- Upgrade your kernel or use a different system + +**Error: "Another instance may already be installed"** +- Check if `/var/lib/nannyagent` exists +- Remove it if you're sure: `sudo rm -rf /var/lib/nannyagent` +- Then retry installation + +**Warning: "Cannot connect to Supabase"** +- Check your network connectivity +- Verify firewall settings allow HTTPS connections +- Ensure SUPABASE_PROJECT_URL is correctly configured in `/etc/nannyagent/config.env` + +### Runtime Issues + +**Error: "This program must be run as root"** +- eBPF requires root privileges +- Always run with: `sudo nannyagent` + +**Error: "Cannot determine kernel version"** +- Ensure `uname` command is available +- Check system integrity + +## Development + +### Building from Source + +```bash +# Clone repository +git clone https://github.com/yourusername/nannyagent.git +cd nannyagent + +# Install Go dependencies +go mod tidy + +# Build binary +make build + +# Run locally (requires sudo) +sudo ./nannyagent +``` + +### Running Tests + +```bash +# Run unit tests +make test + +# Test eBPF capabilities +./tests/test_ebpf_integration.sh ``` ## Safety diff --git a/agent.go b/agent.go index 2c7fd78..0eb9952 100644 --- a/agent.go +++ b/agent.go @@ -54,18 +54,13 @@ func NewLinuxDiagnosticAgent() *LinuxDiagnosticAgent { supabaseURL := os.Getenv("SUPABASE_PROJECT_URL") if supabaseURL == "" { logging.Warning("SUPABASE_PROJECT_URL not set, TensorZero integration will not work") - supabaseURL = "https://gpqzsricripnvbrpsyws.supabase.co" // fallback } - model := os.Getenv("NANNYAPI_MODEL") - if model == "" { - model = "tensorzero::function_name::diagnose_and_heal" - logging.Warning("Using default model '%s'. Set NANNYAPI_MODEL environment variable for your specific function", model) - } + // Default model for diagnostic and healing + model := "tensorzero::function_name::diagnose_and_heal" - // Note: We don't use the OpenAI client anymore, we use direct HTTP to Supabase proxy agent := &LinuxDiagnosticAgent{ - client: nil, // Not used anymore + client: nil, // Not used - we use direct HTTP to Supabase proxy model: model, executor: executor.NewCommandExecutor(10 * time.Second), // 10 second timeout for commands config: DefaultAgentConfig(), // Default concurrent execution config @@ -84,18 +79,13 @@ func NewLinuxDiagnosticAgentWithAuth(authManager interface{}) *LinuxDiagnosticAg supabaseURL := os.Getenv("SUPABASE_PROJECT_URL") if supabaseURL == "" { logging.Warning("SUPABASE_PROJECT_URL not set, TensorZero integration will not work") - supabaseURL = "https://gpqzsricripnvbrpsyws.supabase.co" // fallback } - model := os.Getenv("NANNYAPI_MODEL") - if model == "" { - model = "tensorzero::function_name::diagnose_and_heal" - logging.Warning("Using default model '%s'. Set NANNYAPI_MODEL environment variable for your specific function", model) - } + // Default model for diagnostic and healing + model := "tensorzero::function_name::diagnose_and_heal" - // Note: We don't use the OpenAI client anymore, we use direct HTTP to Supabase proxy agent := &LinuxDiagnosticAgent{ - client: nil, // Not used anymore + client: nil, // Not used - we use direct HTTP to Supabase proxy model: model, executor: executor.NewCommandExecutor(10 * time.Second), // 10 second timeout for commands config: DefaultAgentConfig(), // Default concurrent execution config diff --git a/docs/INSTALLATION.md b/docs/INSTALLATION.md new file mode 100644 index 0000000..dbb02aa --- /dev/null +++ b/docs/INSTALLATION.md @@ -0,0 +1,334 @@ +# NannyAgent Installation Guide + +## Quick Install + +### One-Line Install (Recommended) + +After uploading `install.sh` to your website: + +```bash +curl -fsSL https://your-domain.com/install.sh | sudo bash +``` + +Or with wget: + +```bash +wget -qO- https://your-domain.com/install.sh | sudo bash +``` + +### Two-Step Install (More Secure) + +Download and inspect the installer first: + +```bash +# Download the installer +curl -fsSL https://your-domain.com/install.sh -o install.sh + +# Inspect the script (recommended!) +less install.sh + +# Make it executable +chmod +x install.sh + +# Run the installer +sudo ./install.sh +``` + +## Installation from GitHub + +If you're hosting on GitHub: + +```bash +curl -fsSL https://raw.githubusercontent.com/yourusername/nannyagent/main/install.sh | sudo bash +``` + +## System Requirements + +Before installing, ensure your system meets these requirements: + +### Operating System +- โœ… Linux (any distribution) +- โŒ Windows (not supported) +- โŒ macOS (not supported) +- โŒ Containers/Docker (not supported) +- โŒ LXC (not supported) + +### Architecture +- โœ… amd64 (x86_64) +- โœ… arm64 (aarch64) +- โŒ i386/i686 (32-bit not supported) +- โŒ Other architectures (not supported) + +### Kernel Version +- โœ… Linux kernel 5.x or higher +- โŒ Linux kernel 4.x or lower (not supported) + +Check your kernel version: +```bash +uname -r +# Should show 5.x.x or higher +``` + +### Privileges +- Must have root/sudo access +- Will create system directories: + - `/usr/local/bin/nannyagent` (binary) + - `/etc/nannyagent` (configuration) + - `/var/lib/nannyagent` (data directory) + +### Network +- Connectivity to Supabase backend required +- HTTPS access to your Supabase project URL +- No proxy support at this time + +## What the Installer Does + +The installer performs these steps automatically: + +1. โœ… **System Checks** + - Verifies root privileges + - Detects OS and architecture + - Checks kernel version (5.x+) + - Detects container environments + - Checks for existing installations + +2. โœ… **Dependency Installation** + - Installs `bpftrace` (eBPF tracing tool) + - Installs `bpfcc-tools` (BCC toolkit) + - Installs kernel headers if needed + - Uses your system's package manager (apt/dnf/yum) + +3. โœ… **Build & Install** + - Verifies Go installation (required for building) + - Compiles the nannyagent binary + - Tests connectivity to Supabase + - Installs binary to `/usr/local/bin` + +4. โœ… **Configuration** + - Creates `/etc/nannyagent/config.env` + - Creates `/var/lib/nannyagent` data directory + - Sets proper permissions (secure) + - Creates installation lock file + +## Installation Exit Codes + +The installer exits with specific codes for different scenarios: + +| Exit Code | Meaning | Resolution | +|-----------|---------|------------| +| 0 | Success | Installation completed | +| 1 | Not root | Run with `sudo` | +| 2 | Unsupported OS | Use Linux | +| 3 | Unsupported architecture | Use amd64 or arm64 | +| 4 | Container detected | Install on bare metal or VM | +| 5 | Kernel too old | Upgrade to kernel 5.x+ | +| 6 | Existing installation | Remove `/var/lib/nannyagent` first | +| 7 | eBPF tools failed | Check package manager and repos | +| 8 | Go not installed | Install Go from golang.org | +| 9 | Build failed | Check Go installation and dependencies | +| 10 | Directory creation failed | Check permissions | +| 11 | Binary installation failed | Check disk space and permissions | + +## Post-Installation + +After successful installation: + +### 1. Configure Supabase URL + +Edit the configuration file: +```bash +sudo nano /etc/nannyagent/config.env +``` + +Set your Supabase project URL: +```bash +SUPABASE_PROJECT_URL=https://your-project.supabase.co +TOKEN_PATH=/var/lib/nannyagent/token.json +DEBUG=false +``` + +### 2. Test the Installation + +Check version (no sudo needed): +```bash +nannyagent --version +``` + +Show help (no sudo needed): +```bash +nannyagent --help +``` + +### 3. Run the Agent + +Start the agent (requires sudo): +```bash +sudo nannyagent +``` + +On first run, you'll see authentication instructions: +``` +Visit: https://your-app.com/device-auth +Enter code: ABCD-1234 +``` + +## Uninstallation + +To remove NannyAgent: + +```bash +# Remove binary +sudo rm /usr/local/bin/nannyagent + +# Remove configuration +sudo rm -rf /etc/nannyagent + +# Remove data directory (includes authentication tokens) +sudo rm -rf /var/lib/nannyagent +``` + +## Troubleshooting + +### "Kernel version X.X is not supported" + +Your kernel is too old. Check current version: +```bash +uname -r +``` + +Options: +1. Upgrade your kernel to 5.x or higher +2. Use a different system with a newer kernel +3. Check your distribution's documentation for kernel upgrades + +### "Another instance may already be installed" + +The installer detected an existing installation. Options: + +**Option 1:** Remove the existing installation +```bash +sudo rm -rf /var/lib/nannyagent +``` + +**Option 2:** Check if it's actually running +```bash +ps aux | grep nannyagent +``` + +If running, stop it first, then remove the data directory. + +### "Cannot connect to Supabase" + +This is a warning, not an error. The installation will complete, but the agent won't work without connectivity. + +Check: +1. Is SUPABASE_PROJECT_URL set correctly? + ```bash + cat /etc/nannyagent/config.env + ``` + +2. Can you reach the URL? + ```bash + curl -I https://your-project.supabase.co + ``` + +3. Check firewall rules: + ```bash + sudo iptables -L -n | grep -i drop + ``` + +### "Go is not installed" + +The installer requires Go to build the binary. Install Go: + +**Ubuntu/Debian:** +```bash +sudo apt update +sudo apt install golang-go +``` + +**RHEL/CentOS/Fedora:** +```bash +sudo dnf install golang +``` + +Or download from: https://golang.org/dl/ + +### "eBPF tools installation failed" + +Check your package repositories: + +**Ubuntu/Debian:** +```bash +sudo apt update +sudo apt install bpfcc-tools bpftrace +``` + +**RHEL/Fedora:** +```bash +sudo dnf install bcc-tools bpftrace +``` + +## Security Considerations + +### Permissions + +The installer creates directories with restricted permissions: +- `/etc/nannyagent` - 755 (readable by all, writable by root) +- `/etc/nannyagent/config.env` - 600 (only root can read/write) +- `/var/lib/nannyagent` - 700 (only root can access) + +### Authentication Tokens + +Authentication tokens are stored securely in: +``` +/var/lib/nannyagent/token.json +``` + +Only root can access this file (permissions: 600). + +### Network Communication + +All communication with Supabase uses HTTPS (TLS encrypted). + +## Manual Installation (Alternative) + +If you prefer manual installation: + +```bash +# 1. Clone repository +git clone https://github.com/yourusername/nannyagent.git +cd nannyagent + +# 2. Install eBPF tools (Ubuntu/Debian) +sudo apt update +sudo apt install bpfcc-tools bpftrace linux-headers-$(uname -r) + +# 3. Build binary +go mod tidy +CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags '-w -s' -o nannyagent . + +# 4. Install +sudo cp nannyagent /usr/local/bin/ +sudo chmod 755 /usr/local/bin/nannyagent + +# 5. Create directories +sudo mkdir -p /etc/nannyagent +sudo mkdir -p /var/lib/nannyagent +sudo chmod 700 /var/lib/nannyagent + +# 6. Create configuration +sudo cat > /etc/nannyagent/config.env </dev/null; then + log_error "Container environment detected (Docker)" + log_error "NannyAgent does not support running inside containers or LXC" + exit 4 + fi + + if [ -f /proc/1/environ ] && grep -q "container=lxc" /proc/1/environ 2>/dev/null; then + log_error "LXC environment detected" + log_error "NannyAgent does not support running inside containers or LXC" + exit 4 + fi +} + +# Check kernel version (5.x or higher) +check_kernel_version() { + log_info "Checking kernel version..." + + KERNEL_VERSION=$(uname -r) + KERNEL_MAJOR=$(echo "$KERNEL_VERSION" | cut -d. -f1) + + log_info "Kernel version: $KERNEL_VERSION" + + if [ "$KERNEL_MAJOR" -lt 5 ]; then + log_error "Kernel version $KERNEL_VERSION is not supported" + log_error "NannyAgent requires Linux kernel 5.x or higher" + log_error "Current kernel: $KERNEL_VERSION (major version: $KERNEL_MAJOR)" + exit 5 + fi + + log_success "Kernel version $KERNEL_VERSION is supported" +} + +# Check if another instance is already installed +check_existing_installation() { + log_info "Checking for existing installation..." + + # Check if lock file exists + if [ -f "$LOCKFILE" ]; then + log_error "An installation lock file exists at $LOCKFILE" + log_error "Another instance of NannyAgent may already be installed or running" + log_error "If you're sure no other instance exists, remove the lock file:" + log_error " sudo rm $LOCKFILE" + exit 6 + fi + + # Check if data directory exists and has files + if [ -d "$DATA_DIR" ]; then + FILE_COUNT=$(find "$DATA_DIR" -type f 2>/dev/null | wc -l) + if [ "$FILE_COUNT" -gt 0 ]; then + log_error "Data directory $DATA_DIR already exists with $FILE_COUNT files" + log_error "Another instance of NannyAgent may already be installed" + log_error "To reinstall, please remove the data directory first:" + log_error " sudo rm -rf $DATA_DIR" + exit 6 + fi + fi + + # Check if binary already exists + if [ -f "$INSTALL_DIR/$BINARY_NAME" ]; then + log_warning "Binary $INSTALL_DIR/$BINARY_NAME already exists" + log_warning "It will be replaced with the new version" + fi + + log_success "No conflicting installation found" +} + +# Install required dependencies (eBPF tools) +install_dependencies() { + log_info "Installing eBPF dependencies..." + + # Detect package manager + if command -v apt-get &> /dev/null; then + PKG_MANAGER="apt-get" + log_info "Detected Debian/Ubuntu system" + + # Update package list + log_info "Updating package list..." + apt-get update -qq || { + log_error "Failed to update package list" + exit 7 + } + + # Install bpfcc-tools and bpftrace + log_info "Installing bpfcc-tools and bpftrace..." + DEBIAN_FRONTEND=noninteractive apt-get install -y -qq bpfcc-tools bpftrace linux-headers-$(uname -r) 2>&1 | grep -v "^Reading" | grep -v "^Building" || { + log_error "Failed to install eBPF tools" + exit 7 + } + + elif command -v dnf &> /dev/null; then + PKG_MANAGER="dnf" + log_info "Detected Fedora/RHEL 8+ system" + + log_info "Installing bcc-tools and bpftrace..." + dnf install -y -q bcc-tools bpftrace kernel-devel 2>&1 | grep -v "^Last metadata" || { + log_error "Failed to install eBPF tools" + exit 7 + } + + elif command -v yum &> /dev/null; then + PKG_MANAGER="yum" + log_info "Detected CentOS/RHEL 7 system" + + log_info "Installing bcc-tools and bpftrace..." + yum install -y -q bcc-tools bpftrace kernel-devel 2>&1 | grep -v "^Loaded plugins" || { + log_error "Failed to install eBPF tools" + exit 7 + } + + else + log_error "Unsupported package manager" + log_error "Please install 'bpfcc-tools' and 'bpftrace' manually" + exit 7 + fi + + # Verify installations + if ! command -v bpftrace &> /dev/null; then + log_error "bpftrace installation failed or not in PATH" + exit 7 + fi + + # Check for BCC tools (RedHat systems may have them in /usr/share/bcc/tools/) + if [ -d "/usr/share/bcc/tools" ]; then + log_info "BCC tools found at /usr/share/bcc/tools/" + # Add to PATH if not already there + if [[ ":$PATH:" != *":/usr/share/bcc/tools:"* ]]; then + export PATH="/usr/share/bcc/tools:$PATH" + log_info "Added /usr/share/bcc/tools to PATH" + fi + fi + + log_success "eBPF tools installed successfully" +} + +# Check Go installation +check_go() { + log_info "Checking for Go installation..." + + if ! command -v go &> /dev/null; then + log_error "Go is not installed" + log_error "Please install Go 1.23 or higher from https://golang.org/dl/" + exit 8 + fi + + GO_VERSION=$(go version | awk '{print $3}' | sed 's/go//') + log_info "Go version: $GO_VERSION" + log_success "Go is installed" +} + +# Build the binary +build_binary() { + log_info "Building NannyAgent binary for $ARCH architecture..." + + # Check if go.mod exists + if [ ! -f "go.mod" ]; then + log_error "go.mod not found. Are you in the correct directory?" + exit 9 + fi + + # Get Go dependencies + log_info "Downloading Go dependencies..." + go mod download || { + log_error "Failed to download Go dependencies" + exit 9 + } + + # Build the binary for the current architecture + log_info "Compiling binary for $ARCH..." + CGO_ENABLED=0 GOOS=linux GOARCH="$ARCH" go build -a -installsuffix cgo \ + -ldflags "-w -s -X main.Version=$VERSION" \ + -o "$BINARY_NAME" . || { + log_error "Failed to build binary for $ARCH" + exit 9 + } + + # Verify binary was created + if [ ! -f "$BINARY_NAME" ]; then + log_error "Binary not found after build" + exit 9 + fi + + # Verify binary is executable + chmod +x "$BINARY_NAME" + + # Test the binary + if ./"$BINARY_NAME" --version &>/dev/null; then + log_success "Binary built and tested successfully for $ARCH" + else + log_error "Binary build succeeded but execution test failed" + exit 9 + fi +} + +# Check connectivity to Supabase +check_connectivity() { + log_info "Checking connectivity to Supabase..." + + # Load SUPABASE_PROJECT_URL from .env if it exists + if [ -f ".env" ]; then + source .env 2>/dev/null || true + fi + + if [ -z "$SUPABASE_PROJECT_URL" ]; then + log_warning "SUPABASE_PROJECT_URL not set in .env file" + log_warning "The agent may not work without proper configuration" + log_warning "Please configure $CONFIG_DIR/config.env after installation" + return + fi + + log_info "Testing connection to $SUPABASE_PROJECT_URL..." + + # Try to reach the Supabase endpoint + if command -v curl &> /dev/null; then + HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "$SUPABASE_PROJECT_URL" || echo "000") + + if [ "$HTTP_CODE" = "000" ]; then + log_warning "Cannot connect to $SUPABASE_PROJECT_URL" + log_warning "Network connectivity issue detected" + log_warning "The agent will not work without connectivity to Supabase" + log_warning "Please check your network configuration and firewall settings" + elif [ "$HTTP_CODE" = "404" ] || [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "301" ] || [ "$HTTP_CODE" = "302" ]; then + log_success "Successfully connected to Supabase (HTTP $HTTP_CODE)" + else + log_warning "Received HTTP $HTTP_CODE from $SUPABASE_PROJECT_URL" + log_warning "The agent may not work correctly" + fi + else + log_warning "curl not found, skipping connectivity check" + fi +} + +# Create necessary directories +create_directories() { + log_info "Creating directories..." + + # Create config directory + mkdir -p "$CONFIG_DIR" || { + log_error "Failed to create config directory: $CONFIG_DIR" + exit 10 + } + + # Create data directory with restricted permissions + mkdir -p "$DATA_DIR" || { + log_error "Failed to create data directory: $DATA_DIR" + exit 10 + } + chmod 700 "$DATA_DIR" + + log_success "Directories created successfully" +} + +# Install the binary +install_binary() { + log_info "Installing binary to $INSTALL_DIR..." + + # Copy binary + cp "$BINARY_NAME" "$INSTALL_DIR/$BINARY_NAME" || { + log_error "Failed to copy binary to $INSTALL_DIR" + exit 11 + } + + # Set permissions + chmod 755 "$INSTALL_DIR/$BINARY_NAME" + + # Copy .env to config if it exists + if [ -f ".env" ]; then + log_info "Copying configuration to $CONFIG_DIR..." + cp .env "$CONFIG_DIR/config.env" + chmod 600 "$CONFIG_DIR/config.env" + fi + + # Create lock file + touch "$LOCKFILE" + echo "Installed at $(date)" > "$LOCKFILE" + + log_success "Binary installed successfully" +} + +# Display post-installation information +post_install_info() { + echo "" + log_success "NannyAgent v$VERSION installed successfully!" + echo "" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "" + echo " Configuration: $CONFIG_DIR/config.env" + echo " Data Directory: $DATA_DIR" + echo " Binary Location: $INSTALL_DIR/$BINARY_NAME" + echo "" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "" + echo "Next steps:" + echo "" + echo " 1. Configure your Supabase URL in $CONFIG_DIR/config.env" + echo " 2. Run the agent: sudo $BINARY_NAME" + echo " 3. Check version: $BINARY_NAME --version" + echo " 4. Get help: $BINARY_NAME --help" + echo "" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "" +} + +# Main installation flow +main() { + echo "" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo " NannyAgent Installer v$VERSION" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "" + + check_root + detect_platform + check_kernel_version + check_existing_installation + install_dependencies + check_go + build_binary + check_connectivity + create_directories + install_binary + post_install_info +} + +# Run main installation +main diff --git a/internal/config/config.go b/internal/config/config.go index 26ba0e8..db1d16a 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -37,16 +37,40 @@ var DefaultConfig = Config{ func LoadConfig() (*Config, error) { config := DefaultConfig - // Try to load .env file from current directory or parent directories - envFile := findEnvFile() - if envFile != "" { - if err := godotenv.Load(envFile); err != nil { - logging.Warning("Could not load .env file from %s: %v", envFile, err) + // Priority order for loading configuration: + // 1. /etc/nannyagent/config.env (system-wide installation) + // 2. Current directory .env file (development) + // 3. Parent directory .env file (development) + + configLoaded := false + + // Try system-wide config first + if _, err := os.Stat("/etc/nannyagent/config.env"); err == nil { + if err := godotenv.Load("/etc/nannyagent/config.env"); err != nil { + logging.Warning("Could not load /etc/nannyagent/config.env: %v", err) } else { - logging.Info("Loaded configuration from %s", envFile) + logging.Info("Loaded configuration from /etc/nannyagent/config.env") + configLoaded = true } } + // If system config not found, try local .env file + if !configLoaded { + envFile := findEnvFile() + if envFile != "" { + if err := godotenv.Load(envFile); err != nil { + logging.Warning("Could not load .env file from %s: %v", envFile, err) + } else { + logging.Info("Loaded configuration from %s", envFile) + configLoaded = true + } + } + } + + if !configLoaded { + logging.Warning("No configuration file found. Using environment variables only.") + } + // Load from environment variables if url := os.Getenv("SUPABASE_PROJECT_URL"); url != "" { config.SupabaseProjectURL = url diff --git a/main.go b/main.go index f4ca0ff..f762c43 100644 --- a/main.go +++ b/main.go @@ -2,6 +2,7 @@ package main import ( "bufio" + "flag" "fmt" "log" "os" @@ -19,7 +20,48 @@ import ( "nannyagentv2/internal/websocket" ) -const Version = "v2.0.0" +const Version = "0.0.1" + +// showVersion displays the version information +func showVersion() { + fmt.Printf("nannyagent version %s\n", Version) + fmt.Println("Linux diagnostic agent with eBPF capabilities") + os.Exit(0) +} + +// showHelp displays the help information +func showHelp() { + fmt.Println("NannyAgent - Linux Diagnostic Agent with eBPF Monitoring") + fmt.Printf("Version: %s\n\n", Version) + fmt.Println("USAGE:") + fmt.Printf(" sudo %s [OPTIONS]\n\n", os.Args[0]) + fmt.Println("OPTIONS:") + fmt.Println(" --version, -v Show version information") + fmt.Println(" --help, -h Show this help message") + fmt.Println() + fmt.Println("DESCRIPTION:") + fmt.Println(" NannyAgent is an AI-powered Linux diagnostic tool that uses eBPF") + fmt.Println(" for deep system monitoring and analysis. It requires root privileges") + fmt.Println(" to run for eBPF functionality.") + fmt.Println() + fmt.Println("REQUIREMENTS:") + fmt.Println(" - Linux kernel 5.x or higher") + fmt.Println(" - Root privileges (sudo)") + fmt.Println(" - bpftrace and bpfcc-tools installed") + fmt.Println(" - Network connectivity to Supabase") + fmt.Println() + fmt.Println("CONFIGURATION:") + fmt.Println(" Configuration file: /etc/nannyagent/config.env") + fmt.Println(" Data directory: /var/lib/nannyagent") + fmt.Println() + fmt.Println("EXAMPLES:") + fmt.Printf(" # Run the agent\n") + fmt.Printf(" sudo %s\n\n", os.Args[0]) + fmt.Printf(" # Show version (no sudo required)\n") + fmt.Printf(" %s --version\n\n", os.Args[0]) + fmt.Println("For more information, visit: https://github.com/yourusername/nannyagent") + os.Exit(0) +} // checkRootPrivileges ensures the program is running as root func checkRootPrivileges() { @@ -31,7 +73,7 @@ func checkRootPrivileges() { } } -// checkKernelVersionCompatibility ensures kernel version is 4.4 or higher +// checkKernelVersionCompatibility ensures kernel version is 5.x or higher func checkKernelVersionCompatibility() { output, err := exec.Command("uname", "-r").Output() if err != nil { @@ -54,18 +96,12 @@ func checkKernelVersionCompatibility() { os.Exit(1) } - minor, err := strconv.Atoi(parts[1]) - if err != nil { - logging.Error("Cannot parse minor kernel version: %s", parts[1]) - os.Exit(1) - } - - // Check if kernel is 4.4 or higher - if major < 4 || (major == 4 && minor < 4) { - logging.Error("Kernel version %s is too old for eBPF", kernelVersion) - logging.Error("Required: Linux kernel 4.4 or higher") - logging.Error("Current: %s", kernelVersion) - logging.Error("Reason: eBPF requires kernel features introduced in 4.4+:\n - BPF system call support\n - eBPF program types (kprobe, tracepoint)\n - BPF maps and helper functions") + // Check if kernel is 5.x or higher + if major < 5 { + logging.Error("Kernel version %s is not supported", kernelVersion) + logging.Error("Required: Linux kernel 5.x or higher") + logging.Error("Current: %s (major version: %d)", kernelVersion, major) + logging.Error("Reason: NannyAgent requires modern kernel features:\n - Advanced eBPF capabilities\n - BTF (BPF Type Format) support\n - Enhanced security and stability") os.Exit(1) } } @@ -126,6 +162,23 @@ func runInteractiveDiagnostics(agent *LinuxDiagnosticAgent) { } func main() { + // Define flags with both long and short versions + versionFlag := flag.Bool("version", false, "Show version information") + versionFlagShort := flag.Bool("v", false, "Show version information (short)") + helpFlag := flag.Bool("help", false, "Show help information") + helpFlagShort := flag.Bool("h", false, "Show help information (short)") + flag.Parse() + + // Handle --version or -v flag (no root required) + if *versionFlag || *versionFlagShort { + showVersion() + } + + // Handle --help or -h flag (no root required) + if *helpFlag || *helpFlagShort { + showHelp() + } + logging.Info("NannyAgent v%s starting...", Version) // Perform system compatibility checks first diff --git a/scripts/debug_trace_script.sh b/scripts/debug_trace_script.sh deleted file mode 100755 index bcc5e0f..0000000 --- a/scripts/debug_trace_script.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -# Test the current script generation -echo "Testing tracepoint script generation..." - -# Simulate what the failing test does -echo "Target: syscalls:sys_enter_openat" -echo "ProbeType: t" -echo "" -echo "Generated bpftrace script would be:" -echo "tracepoint:syscalls:sys_enter_openat {" -echo " printf(\"TRACE|%d|%d|%d|%s|syscalls:sys_enter_openat|file access\\n\", nsecs, pid, tid, comm, arg2@user);" -echo "}" -echo "" -echo "This is INVALID - should be:" -echo "tracepoint:syscalls:sys_enter_openat {" -echo " printf(\"TRACE|%d|%d|%d|%s|openat|file access\\n\", nsecs, pid, tid, comm);" -echo "}" - diff --git a/scripts/demo_ebpf_integration.sh b/scripts/demo_ebpf_integration.sh deleted file mode 100755 index 9e9ac86..0000000 --- a/scripts/demo_ebpf_integration.sh +++ /dev/null @@ -1,141 +0,0 @@ -#!/bin/bash - -# Test the eBPF-enhanced NannyAgent -# This script demonstrates the new eBPF integration capabilities - -set -e - -echo "๐Ÿ”ฌ Testing eBPF-Enhanced NannyAgent" -echo "==================================" -echo "" - -AGENT="./nannyagent-ebpf" - -if [ ! -f "$AGENT" ]; then - echo "Building agent..." - go build -o nannyagent-ebpf . -fi - -echo "1. Checking eBPF Capabilities" -echo "-----------------------------" -./ebpf_helper.sh check -echo "" - -echo "2. Testing eBPF Manager Initialization" -echo "-------------------------------------" -echo "Starting agent in test mode..." -echo "" - -# Create a test script that will send a predefined issue to test eBPF -cat > /tmp/test_ebpf_issue.txt << 'EOF' -Network connection timeouts to external services. Applications report intermittent failures when trying to connect to remote APIs. The issue occurs randomly and affects multiple processes. -EOF - -echo "Test Issue: Network connection timeouts" -echo "Expected eBPF Programs: Network tracing, syscall monitoring" -echo "" - -echo "3. Demonstration of eBPF Program Suggestions" -echo "-------------------------------------------" - -# Show what eBPF programs would be suggested for different issues -echo "For NETWORK issues - Expected eBPF programs:" -echo "- tracepoint:syscalls/sys_enter_connect (network connections)" -echo "- kprobe:tcp_connect (TCP connection attempts)" -echo "- kprobe:tcp_sendmsg (network send operations)" -echo "" - -echo "For PROCESS issues - Expected eBPF programs:" -echo "- tracepoint:syscalls/sys_enter_execve (process execution)" -echo "- tracepoint:sched/sched_process_exit (process termination)" -echo "- kprobe:do_fork (process creation)" -echo "" - -echo "For FILE issues - Expected eBPF programs:" -echo "- tracepoint:syscalls/sys_enter_openat (file opens)" -echo "- kprobe:vfs_read (file reads)" -echo "- kprobe:vfs_write (file writes)" -echo "" - -echo "For PERFORMANCE issues - Expected eBPF programs:" -echo "- tracepoint:syscalls/sys_enter_* (syscall frequency analysis)" -echo "- kprobe:schedule (CPU scheduling events)" -echo "" - -echo "4. eBPF Integration Features" -echo "---------------------------" -echo "โœ“ Cilium eBPF library integration" -echo "โœ“ bpftrace-based program execution" -echo "โœ“ Dynamic program generation based on issue type" -echo "โœ“ Parallel execution with regular diagnostic commands" -echo "โœ“ Structured JSON event collection" -echo "โœ“ AI-driven eBPF program selection" -echo "" - -echo "5. Example AI Response with eBPF" -echo "-------------------------------" -cat << 'EOF' -{ - "response_type": "diagnostic", - "reasoning": "Network timeout issues require monitoring TCP connections and system calls to identify bottlenecks", - "commands": [ - {"id": "net_status", "command": "ss -tulpn", "description": "Current network connections"}, - {"id": "net_config", "command": "ip route show", "description": "Network configuration"} - ], - "ebpf_programs": [ - { - "name": "tcp_connect_monitor", - "type": "kprobe", - "target": "tcp_connect", - "duration": 15, - "description": "Monitor TCP connection attempts" - }, - { - "name": "syscall_network", - "type": "tracepoint", - "target": "syscalls/sys_enter_connect", - "duration": 15, - "filters": {"comm": "curl"}, - "description": "Monitor network-related system calls" - } - ] -} -EOF -echo "" - -echo "6. Security and Safety" -echo "--------------------" -echo "โœ“ eBPF programs are read-only and time-limited" -echo "โœ“ No system modification capabilities" -echo "โœ“ Automatic cleanup after execution" -echo "โœ“ Safe execution in containers and restricted environments" -echo "โœ“ Graceful fallback when eBPF is not available" -echo "" - -echo "7. Next Steps" -echo "------------" -echo "To test the full eBPF integration:" -echo "" -echo "a) Run with root privileges for full eBPF access:" -echo " sudo $AGENT" -echo "" -echo "b) Try these test scenarios:" -echo " - 'Network connection timeouts'" -echo " - 'High CPU usage and slow performance'" -echo " - 'File permission errors'" -echo " - 'Process hanging or not responding'" -echo "" -echo "c) Install additional eBPF tools:" -echo " sudo ./ebpf_helper.sh install" -echo "" - -echo "๐ŸŽฏ eBPF Integration Complete!" -echo "" -echo "The agent now supports:" -echo "- Dynamic eBPF program compilation and execution" -echo "- AI-driven selection of appropriate tracepoints and kprobes" -echo "- Real-time system event monitoring during diagnosis" -echo "- Integration with Cilium eBPF library for professional-grade monitoring" -echo "" -echo "This provides unprecedented visibility into system behavior" -echo "for accurate root cause analysis and issue resolution." diff --git a/scripts/discover-functions.sh b/scripts/discover-functions.sh deleted file mode 100755 index 8117cda..0000000 --- a/scripts/discover-functions.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/bash - -# NannyAPI Function Discovery Script -# This script helps you find the correct function name for your NannyAPI setup - -echo "๐Ÿ” NannyAPI Function Discovery" -echo "==============================" -echo "" - -ENDPOINT="${NANNYAPI_ENDPOINT:-http://tensorzero.netcup.internal:3000/openai/v1}" - -echo "Testing endpoint: $ENDPOINT/chat/completions" -echo "" - -# Test common function name patterns -test_functions=( - "nannyapi::function_name::diagnose" - "nannyapi::function_name::diagnose_and_heal" - "nannyapi::function_name::linux_diagnostic" - "nannyapi::function_name::system_diagnostic" - "nannyapi::model_name::gpt-4" - "nannyapi::model_name::claude" -) - -for func in "${test_functions[@]}"; do - echo "Testing function: $func" - - response=$(curl -s -X POST "$ENDPOINT/chat/completions" \ - -H "Content-Type: application/json" \ - -d "{\"model\":\"$func\",\"messages\":[{\"role\":\"user\",\"content\":\"test\"}]}") - - if echo "$response" | grep -q "Unknown function"; then - echo " โŒ Function not found" - elif echo "$response" | grep -q "error"; then - echo " โš ๏ธ Error: $(echo "$response" | jq -r '.error' 2>/dev/null || echo "$response")" - else - echo " โœ… Function exists and responding!" - echo " Use this in your environment: export NANNYAPI_MODEL=\"$func\"" - fi - echo "" -done - -echo "๐Ÿ’ก If none of the above work, check your NannyAPI configuration file" -echo " for the correct function names and update NANNYAPI_MODEL accordingly." -echo "" -echo "Example NannyAPI config snippet:" -echo "```yaml" -echo "functions:" -echo " diagnose_and_heal: # This becomes 'nannyapi::function_name::diagnose_and_heal'" -echo " # function definition" -echo "```" diff --git a/scripts/ebpf_helper.sh b/scripts/ebpf_helper.sh deleted file mode 100755 index e024148..0000000 --- a/scripts/ebpf_helper.sh +++ /dev/null @@ -1,296 +0,0 @@ -#!/bin/bash - -# eBPF Helper Scripts for NannyAgent -# This script contains various eBPF programs and helpers for system monitoring - -# Check if running as root (required for most eBPF operations) -check_root() { - if [ "$EUID" -ne 0 ]; then - echo "Warning: Many eBPF operations require root privileges" - echo "Consider running with sudo for full functionality" - fi -} - -# Install eBPF tools if not present -install_ebpf_tools() { - echo "Installing eBPF tools..." - - # Detect package manager and install appropriate packages - if command -v apt-get >/dev/null 2>&1; then - # Ubuntu/Debian - echo "Detected Ubuntu/Debian system" - apt-get update - apt-get install -y bpftrace linux-tools-generic linux-tools-$(uname -r) || true - apt-get install -y bcc-tools python3-bcc || true - elif command -v yum >/dev/null 2>&1; then - # RHEL/CentOS 7 - echo "Detected RHEL/CentOS system" - yum install -y bpftrace perf || true - elif command -v dnf >/dev/null 2>&1; then - # RHEL/CentOS 8+/Fedora - echo "Detected Fedora/RHEL 8+ system" - dnf install -y bpftrace perf bcc-tools python3-bcc || true - elif command -v zypper >/dev/null 2>&1; then - # openSUSE - echo "Detected openSUSE system" - zypper install -y bpftrace perf || true - else - echo "Unknown package manager. Please install eBPF tools manually:" - echo "- bpftrace" - echo "- perf (linux-tools)" - echo "- BCC tools (optional)" - fi -} - -# Check eBPF capabilities of the current system -check_ebpf_capabilities() { - echo "Checking eBPF capabilities..." - - # Check kernel version - kernel_version=$(uname -r) - echo "Kernel version: $kernel_version" - - # Check if eBPF is enabled in kernel - if [ -f /proc/config.gz ]; then - if zcat /proc/config.gz | grep -q "CONFIG_BPF=y"; then - echo "โœ“ eBPF support enabled in kernel" - else - echo "โœ— eBPF support not found in kernel config" - fi - elif [ -f "/boot/config-$(uname -r)" ]; then - if grep -q "CONFIG_BPF=y" "/boot/config-$(uname -r)"; then - echo "โœ“ eBPF support enabled in kernel" - else - echo "โœ— eBPF support not found in kernel config" - fi - else - echo "? Unable to check kernel eBPF config" - fi - - # Check available tools - echo "" - echo "Available eBPF tools:" - - tools=("bpftrace" "perf" "execsnoop" "opensnoop" "tcpconnect" "biotop") - for tool in "${tools[@]}"; do - if command -v "$tool" >/dev/null 2>&1; then - echo "โœ“ $tool" - else - echo "โœ— $tool" - fi - done - - # Check debugfs mount - if mount | grep -q debugfs; then - echo "โœ“ debugfs mounted" - else - echo "โœ— debugfs not mounted (required for ftrace)" - echo " To mount: sudo mount -t debugfs none /sys/kernel/debug" - fi - - # Check if we can load eBPF programs - echo "" - echo "Testing eBPF program loading..." - if bpftrace -e 'BEGIN { print("eBPF test successful"); exit(); }' >/dev/null 2>&1; then - echo "โœ“ eBPF program loading works" - else - echo "โœ— eBPF program loading failed (may need root privileges)" - fi -} - -# Create simple syscall monitoring script -create_syscall_monitor() { - cat > /tmp/nannyagent_syscall_monitor.bt << 'EOF' -#!/usr/bin/env bpftrace - -BEGIN { - printf("Monitoring syscalls... Press Ctrl-C to stop\n"); - printf("[\n"); -} - -tracepoint:syscalls:sys_enter_* { - printf("{\"timestamp\":%llu,\"event_type\":\"syscall_enter\",\"process_id\":%d,\"process_name\":\"%s\",\"syscall\":\"%s\",\"user_id\":%d},\n", - nsecs, pid, comm, probe, uid); -} - -END { - printf("]\n"); -} -EOF - - chmod +x /tmp/nannyagent_syscall_monitor.bt - echo "Syscall monitor created: /tmp/nannyagent_syscall_monitor.bt" -} - -# Create network activity monitor -create_network_monitor() { - cat > /tmp/nannyagent_network_monitor.bt << 'EOF' -#!/usr/bin/env bpftrace - -BEGIN { - printf("Monitoring network activity... Press Ctrl-C to stop\n"); - printf("[\n"); -} - -kprobe:tcp_sendmsg, -kprobe:tcp_recvmsg, -kprobe:udp_sendmsg, -kprobe:udp_recvmsg { - $action = (probe =~ /send/ ? "send" : "recv"); - $protocol = (probe =~ /tcp/ ? "tcp" : "udp"); - printf("{\"timestamp\":%llu,\"event_type\":\"network_%s\",\"protocol\":\"%s\",\"process_id\":%d,\"process_name\":\"%s\"},\n", - nsecs, $action, $protocol, pid, comm); -} - -END { - printf("]\n"); -} -EOF - - chmod +x /tmp/nannyagent_network_monitor.bt - echo "Network monitor created: /tmp/nannyagent_network_monitor.bt" -} - -# Create file access monitor -create_file_monitor() { - cat > /tmp/nannyagent_file_monitor.bt << 'EOF' -#!/usr/bin/env bpftrace - -BEGIN { - printf("Monitoring file access... Press Ctrl-C to stop\n"); - printf("[\n"); -} - -tracepoint:syscalls:sys_enter_openat { - printf("{\"timestamp\":%llu,\"event_type\":\"file_open\",\"process_id\":%d,\"process_name\":\"%s\",\"filename\":\"%s\",\"flags\":%d},\n", - nsecs, pid, comm, str(args->pathname), args->flags); -} - -tracepoint:syscalls:sys_enter_unlinkat { - printf("{\"timestamp\":%llu,\"event_type\":\"file_delete\",\"process_id\":%d,\"process_name\":\"%s\",\"filename\":\"%s\"},\n", - nsecs, pid, comm, str(args->pathname)); -} - -END { - printf("]\n"); -} -EOF - - chmod +x /tmp/nannyagent_file_monitor.bt - echo "File monitor created: /tmp/nannyagent_file_monitor.bt" -} - -# Create process monitor -create_process_monitor() { - cat > /tmp/nannyagent_process_monitor.bt << 'EOF' -#!/usr/bin/env bpftrace - -BEGIN { - printf("Monitoring process activity... Press Ctrl-C to stop\n"); - printf("[\n"); -} - -tracepoint:syscalls:sys_enter_execve { - printf("{\"timestamp\":%llu,\"event_type\":\"process_exec\",\"process_id\":%d,\"process_name\":\"%s\",\"filename\":\"%s\"},\n", - nsecs, pid, comm, str(args->filename)); -} - -tracepoint:sched:sched_process_exit { - printf("{\"timestamp\":%llu,\"event_type\":\"process_exit\",\"process_id\":%d,\"process_name\":\"%s\",\"exit_code\":%d},\n", - nsecs, args->pid, args->comm, args->code); -} - -END { - printf("]\n"); -} -EOF - - chmod +x /tmp/nannyagent_process_monitor.bt - echo "Process monitor created: /tmp/nannyagent_process_monitor.bt" -} - -# Performance monitoring setup -setup_performance_monitoring() { - echo "Setting up performance monitoring..." - - # Create performance monitoring script - cat > /tmp/nannyagent_perf_monitor.sh << 'EOF' -#!/bin/bash - -DURATION=${1:-10} -OUTPUT_FILE=${2:-/tmp/nannyagent_perf_output.json} - -echo "Running performance monitoring for $DURATION seconds..." -echo "[" > "$OUTPUT_FILE" - -# Sample system performance every second -for i in $(seq 1 $DURATION); do - timestamp=$(date +%s)000000000 - cpu_percent=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1) - memory_percent=$(free | grep Mem | awk '{printf "%.1f", $3/$2 * 100.0}') - load_avg=$(uptime | awk -F'load average:' '{print $2}' | xargs) - - echo "{\"timestamp\":$timestamp,\"event_type\":\"performance_sample\",\"cpu_percent\":\"$cpu_percent\",\"memory_percent\":\"$memory_percent\",\"load_avg\":\"$load_avg\"}," >> "$OUTPUT_FILE" - - [ $i -lt $DURATION ] && sleep 1 -done - -echo "]" >> "$OUTPUT_FILE" -echo "Performance data saved to $OUTPUT_FILE" -EOF - - chmod +x /tmp/nannyagent_perf_monitor.sh - echo "Performance monitor created: /tmp/nannyagent_perf_monitor.sh" -} - -# Main function -main() { - check_root - - case "${1:-help}" in - "install") - install_ebpf_tools - ;; - "check") - check_ebpf_capabilities - ;; - "setup") - echo "Setting up eBPF monitoring scripts..." - create_syscall_monitor - create_network_monitor - create_file_monitor - create_process_monitor - setup_performance_monitoring - echo "All eBPF monitoring scripts created in /tmp/" - ;; - "test") - echo "Testing eBPF functionality..." - check_ebpf_capabilities - if command -v bpftrace >/dev/null 2>&1; then - echo "Running quick eBPF test..." - timeout 5s bpftrace -e 'BEGIN { print("eBPF is working!"); } tracepoint:syscalls:sys_enter_openat { @[comm] = count(); } END { print(@); clear(@); }' - fi - ;; - "help"|*) - echo "eBPF Helper Script for NannyAgent" - echo "" - echo "Usage: $0 [command]" - echo "" - echo "Commands:" - echo " install - Install eBPF tools on the system" - echo " check - Check eBPF capabilities" - echo " setup - Create eBPF monitoring scripts" - echo " test - Test eBPF functionality" - echo " help - Show this help message" - echo "" - echo "Examples:" - echo " $0 check # Check what eBPF tools are available" - echo " $0 install # Install eBPF tools (requires root)" - echo " $0 setup # Create monitoring scripts" - echo " $0 test # Test eBPF functionality" - ;; - esac -} - -# Run main function with all arguments -main "$@" diff --git a/scripts/install.sh b/scripts/install.sh deleted file mode 100755 index c51649b..0000000 --- a/scripts/install.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/bash - -# Linux Diagnostic Agent Installation Script -# This script installs the nanny-agent on a Linux system - -set -e - -echo "๐Ÿ”ง Linux Diagnostic Agent Installation Script" -echo "==============================================" - -# Check if Go is installed -if ! command -v go &> /dev/null; then - echo "โŒ Go is not installed. Please install Go first:" - echo "" - echo "For Ubuntu/Debian:" - echo " sudo apt update && sudo apt install golang-go" - echo "" - echo "For RHEL/CentOS/Fedora:" - echo " sudo dnf install golang" - echo " # or" - echo " sudo yum install golang" - echo "" - exit 1 -fi - -echo "โœ… Go is installed: $(go version)" - -# Build the application -echo "๐Ÿ”จ Building the application..." -go mod tidy -make build - -# Check if build was successful -if [ ! -f "./nanny-agent" ]; then - echo "โŒ Build failed! nanny-agent binary not found." - exit 1 -fi - -echo "โœ… Build successful!" - -# Ask for installation preference -echo "" -echo "Installation options:" -echo "1. Install system-wide (/usr/local/bin) - requires sudo" -echo "2. Keep in current directory" -echo "" -read -p "Choose option (1 or 2): " choice - -case $choice in - 1) - echo "๐Ÿ“ฆ Installing system-wide..." - sudo cp nanny-agent /usr/local/bin/ - sudo chmod +x /usr/local/bin/nanny-agent - echo "โœ… Agent installed to /usr/local/bin/nanny-agent" - echo "" - echo "You can now run the agent from anywhere with:" - echo " nanny-agent" - ;; - 2) - echo "โœ… Agent ready in current directory" - echo "" - echo "Run the agent with:" - echo " ./nanny-agent" - ;; - *) - echo "โŒ Invalid choice. Agent is available in current directory." - echo "Run with: ./nanny-agent" - ;; -esac - -# Configuration -echo "" -echo "๐Ÿ“ Configuration:" -echo "Set these environment variables to configure the agent:" -echo "" -echo "export NANNYAPI_ENDPOINT=\"http://your-nannyapi-host:3000/openai/v1\"" -echo "export NANNYAPI_MODEL=\"your-model-identifier\"" -echo "" -echo "Or create a .env file in the working directory." -echo "" -echo "๐ŸŽ‰ Installation complete!" -echo "" -echo "Example usage:" -echo " ./nanny-agent" -echo " > On /var filesystem I cannot create any file but df -h shows 30% free space available." diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh deleted file mode 100755 index 507d588..0000000 --- a/scripts/integration-tests.sh +++ /dev/null @@ -1,116 +0,0 @@ -#!/bin/bash - -# Linux Diagnostic Agent - Integration Tests -# This script creates realistic Linux problem scenarios for testing - -set -e - -AGENT_BINARY="./nanny-agent" -TEST_DIR="/tmp/nanny-agent-tests" -TEST_LOG="$TEST_DIR/integration_test.log" - -# Color codes for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -# Ensure test directory exists -mkdir -p "$TEST_DIR" - -echo -e "${BLUE}๐Ÿงช Linux Diagnostic Agent - Integration Tests${NC}" -echo "=================================================" -echo "" - -# Check if agent binary exists -if [[ ! -f "$AGENT_BINARY" ]]; then - echo -e "${RED}โŒ Agent binary not found at $AGENT_BINARY${NC}" - echo "Please run: make build" - exit 1 -fi - -# Function to run a test scenario -run_test() { - local test_name="$1" - local scenario="$2" - local expected_keywords="$3" - - echo -e "${YELLOW}๐Ÿ“‹ Test: $test_name${NC}" - echo "Scenario: $scenario" - echo "" - - # Run the agent with the scenario - echo "$scenario" | timeout 120s "$AGENT_BINARY" > "$TEST_LOG" 2>&1 || true - - # Check if any expected keywords are found in the output - local found_keywords=0 - IFS=',' read -ra KEYWORDS <<< "$expected_keywords" - for keyword in "${KEYWORDS[@]}"; do - keyword=$(echo "$keyword" | xargs) # trim whitespace - if grep -qi "$keyword" "$TEST_LOG"; then - echo -e "${GREEN} โœ… Found expected keyword: $keyword${NC}" - ((found_keywords++)) - else - echo -e "${RED} โŒ Missing keyword: $keyword${NC}" - fi - done - - # Show summary - if [[ $found_keywords -gt 0 ]]; then - echo -e "${GREEN} โœ… Test PASSED ($found_keywords keywords found)${NC}" - else - echo -e "${RED} โŒ Test FAILED (no expected keywords found)${NC}" - fi - - echo "" - echo "Full output saved to: $TEST_LOG" - echo "----------------------------------------" - echo "" -} - -# Test Scenario 1: Disk Space Issues (Inode Exhaustion) -run_test "Disk Space - Inode Exhaustion" \ - "I cannot create new files in /home directory even though df -h shows plenty of space available. Getting 'No space left on device' error when trying to touch new files." \ - "inode,df -i,filesystem,inodes,exhausted" - -# Test Scenario 2: Memory Issues -run_test "Memory Issues - OOM Killer" \ - "My applications keep getting killed randomly and I see 'killed' messages in logs. The system becomes unresponsive for a few seconds before recovering. This happens especially when running memory-intensive tasks." \ - "memory,oom,killed,dmesg,free,swap" - -# Test Scenario 3: Network Connectivity Issues -run_test "Network Connectivity - DNS Resolution" \ - "I can ping IP addresses directly (like 8.8.8.8) but cannot resolve domain names. Web browsing fails with DNS resolution errors, but ping 8.8.8.8 works fine." \ - "dns,resolv.conf,nslookup,nameserver,dig" - -# Test Scenario 4: Service/Process Issues -run_test "Service Issues - High Load" \ - "System load average is consistently above 10.0 even when CPU usage appears normal. Applications are responding slowly and I notice high wait times. The server feels sluggish overall." \ - "load,average,cpu,iostat,vmstat,processes" - -# Test Scenario 5: File System Issues -run_test "Filesystem Issues - Permission Problems" \ - "Web server returns 403 Forbidden errors for all pages. Files exist and seem readable, but nginx logs show permission denied errors. SELinux is disabled and file permissions look correct." \ - "permission,403,nginx,chmod,chown,selinux" - -# Test Scenario 6: Boot/System Issues -run_test "Boot Issues - Kernel Module" \ - "System boots but some hardware devices are not working. Network interface shows as down, USB devices are not recognized, and dmesg shows module loading failures." \ - "module,lsmod,dmesg,hardware,interface,usb" - -# Test Scenario 7: Performance Issues -run_test "Performance Issues - I/O Bottleneck" \ - "Database queries are extremely slow, taking 30+ seconds for simple SELECT statements. Disk activity LED is constantly on and system feels unresponsive during database operations." \ - "iostat,iotop,disk,database,slow,performance" - -echo -e "${BLUE}๐Ÿ Integration Tests Complete${NC}" -echo "" -echo "Check individual test logs in: $TEST_DIR" -echo "" -echo -e "${YELLOW}๐Ÿ’ก Tips:${NC}" -echo "- Tests use realistic scenarios that could occur on production systems" -echo "- Each test expects the AI to suggest relevant diagnostic commands" -echo "- Review the full logs to see the complete diagnostic conversation" -echo "- Tests timeout after 120 seconds to prevent hanging" -echo "- Make sure NANNYAPI_ENDPOINT and NANNYAPI_MODEL are set correctly"