Integrate with supabase backend

This commit is contained in:
Harshavardhan Musanalli
2025-10-25 12:39:48 +02:00
parent f69e1dbc66
commit 6fd403cb5f
14 changed files with 1154 additions and 124 deletions

141
scripts/demo_ebpf_integration.sh Executable file
View File

@@ -0,0 +1,141 @@
#!/bin/bash
# Test the eBPF-enhanced NannyAgent
# This script demonstrates the new eBPF integration capabilities
set -e
echo "🔬 Testing eBPF-Enhanced NannyAgent"
echo "=================================="
echo ""
AGENT="./nannyagent-ebpf"
if [ ! -f "$AGENT" ]; then
echo "Building agent..."
go build -o nannyagent-ebpf .
fi
echo "1. Checking eBPF Capabilities"
echo "-----------------------------"
./ebpf_helper.sh check
echo ""
echo "2. Testing eBPF Manager Initialization"
echo "-------------------------------------"
echo "Starting agent in test mode..."
echo ""
# Create a test script that will send a predefined issue to test eBPF
cat > /tmp/test_ebpf_issue.txt << 'EOF'
Network connection timeouts to external services. Applications report intermittent failures when trying to connect to remote APIs. The issue occurs randomly and affects multiple processes.
EOF
echo "Test Issue: Network connection timeouts"
echo "Expected eBPF Programs: Network tracing, syscall monitoring"
echo ""
echo "3. Demonstration of eBPF Program Suggestions"
echo "-------------------------------------------"
# Show what eBPF programs would be suggested for different issues
echo "For NETWORK issues - Expected eBPF programs:"
echo "- tracepoint:syscalls/sys_enter_connect (network connections)"
echo "- kprobe:tcp_connect (TCP connection attempts)"
echo "- kprobe:tcp_sendmsg (network send operations)"
echo ""
echo "For PROCESS issues - Expected eBPF programs:"
echo "- tracepoint:syscalls/sys_enter_execve (process execution)"
echo "- tracepoint:sched/sched_process_exit (process termination)"
echo "- kprobe:do_fork (process creation)"
echo ""
echo "For FILE issues - Expected eBPF programs:"
echo "- tracepoint:syscalls/sys_enter_openat (file opens)"
echo "- kprobe:vfs_read (file reads)"
echo "- kprobe:vfs_write (file writes)"
echo ""
echo "For PERFORMANCE issues - Expected eBPF programs:"
echo "- tracepoint:syscalls/sys_enter_* (syscall frequency analysis)"
echo "- kprobe:schedule (CPU scheduling events)"
echo ""
echo "4. eBPF Integration Features"
echo "---------------------------"
echo "✓ Cilium eBPF library integration"
echo "✓ bpftrace-based program execution"
echo "✓ Dynamic program generation based on issue type"
echo "✓ Parallel execution with regular diagnostic commands"
echo "✓ Structured JSON event collection"
echo "✓ AI-driven eBPF program selection"
echo ""
echo "5. Example AI Response with eBPF"
echo "-------------------------------"
cat << 'EOF'
{
"response_type": "diagnostic",
"reasoning": "Network timeout issues require monitoring TCP connections and system calls to identify bottlenecks",
"commands": [
{"id": "net_status", "command": "ss -tulpn", "description": "Current network connections"},
{"id": "net_config", "command": "ip route show", "description": "Network configuration"}
],
"ebpf_programs": [
{
"name": "tcp_connect_monitor",
"type": "kprobe",
"target": "tcp_connect",
"duration": 15,
"description": "Monitor TCP connection attempts"
},
{
"name": "syscall_network",
"type": "tracepoint",
"target": "syscalls/sys_enter_connect",
"duration": 15,
"filters": {"comm": "curl"},
"description": "Monitor network-related system calls"
}
]
}
EOF
echo ""
echo "6. Security and Safety"
echo "--------------------"
echo "✓ eBPF programs are read-only and time-limited"
echo "✓ No system modification capabilities"
echo "✓ Automatic cleanup after execution"
echo "✓ Safe execution in containers and restricted environments"
echo "✓ Graceful fallback when eBPF is not available"
echo ""
echo "7. Next Steps"
echo "------------"
echo "To test the full eBPF integration:"
echo ""
echo "a) Run with root privileges for full eBPF access:"
echo " sudo $AGENT"
echo ""
echo "b) Try these test scenarios:"
echo " - 'Network connection timeouts'"
echo " - 'High CPU usage and slow performance'"
echo " - 'File permission errors'"
echo " - 'Process hanging or not responding'"
echo ""
echo "c) Install additional eBPF tools:"
echo " sudo ./ebpf_helper.sh install"
echo ""
echo "🎯 eBPF Integration Complete!"
echo ""
echo "The agent now supports:"
echo "- Dynamic eBPF program compilation and execution"
echo "- AI-driven selection of appropriate tracepoints and kprobes"
echo "- Real-time system event monitoring during diagnosis"
echo "- Integration with Cilium eBPF library for professional-grade monitoring"
echo ""
echo "This provides unprecedented visibility into system behavior"
echo "for accurate root cause analysis and issue resolution."

51
scripts/discover-functions.sh Executable file
View File

@@ -0,0 +1,51 @@
#!/bin/bash
# NannyAPI Function Discovery Script
# This script helps you find the correct function name for your NannyAPI setup
echo "🔍 NannyAPI Function Discovery"
echo "=============================="
echo ""
ENDPOINT="${NANNYAPI_ENDPOINT:-http://tensorzero.netcup.internal:3000/openai/v1}"
echo "Testing endpoint: $ENDPOINT/chat/completions"
echo ""
# Test common function name patterns
test_functions=(
"nannyapi::function_name::diagnose"
"nannyapi::function_name::diagnose_and_heal"
"nannyapi::function_name::linux_diagnostic"
"nannyapi::function_name::system_diagnostic"
"nannyapi::model_name::gpt-4"
"nannyapi::model_name::claude"
)
for func in "${test_functions[@]}"; do
echo "Testing function: $func"
response=$(curl -s -X POST "$ENDPOINT/chat/completions" \
-H "Content-Type: application/json" \
-d "{\"model\":\"$func\",\"messages\":[{\"role\":\"user\",\"content\":\"test\"}]}")
if echo "$response" | grep -q "Unknown function"; then
echo " ❌ Function not found"
elif echo "$response" | grep -q "error"; then
echo " ⚠️ Error: $(echo "$response" | jq -r '.error' 2>/dev/null || echo "$response")"
else
echo " ✅ Function exists and responding!"
echo " Use this in your environment: export NANNYAPI_MODEL=\"$func\""
fi
echo ""
done
echo "💡 If none of the above work, check your NannyAPI configuration file"
echo " for the correct function names and update NANNYAPI_MODEL accordingly."
echo ""
echo "Example NannyAPI config snippet:"
echo "```yaml"
echo "functions:"
echo " diagnose_and_heal: # This becomes 'nannyapi::function_name::diagnose_and_heal'"
echo " # function definition"
echo "```"

296
scripts/ebpf_helper.sh Executable file
View File

@@ -0,0 +1,296 @@
#!/bin/bash
# eBPF Helper Scripts for NannyAgent
# This script contains various eBPF programs and helpers for system monitoring
# Check if running as root (required for most eBPF operations)
check_root() {
if [ "$EUID" -ne 0 ]; then
echo "Warning: Many eBPF operations require root privileges"
echo "Consider running with sudo for full functionality"
fi
}
# Install eBPF tools if not present
install_ebpf_tools() {
echo "Installing eBPF tools..."
# Detect package manager and install appropriate packages
if command -v apt-get >/dev/null 2>&1; then
# Ubuntu/Debian
echo "Detected Ubuntu/Debian system"
apt-get update
apt-get install -y bpftrace linux-tools-generic linux-tools-$(uname -r) || true
apt-get install -y bcc-tools python3-bcc || true
elif command -v yum >/dev/null 2>&1; then
# RHEL/CentOS 7
echo "Detected RHEL/CentOS system"
yum install -y bpftrace perf || true
elif command -v dnf >/dev/null 2>&1; then
# RHEL/CentOS 8+/Fedora
echo "Detected Fedora/RHEL 8+ system"
dnf install -y bpftrace perf bcc-tools python3-bcc || true
elif command -v zypper >/dev/null 2>&1; then
# openSUSE
echo "Detected openSUSE system"
zypper install -y bpftrace perf || true
else
echo "Unknown package manager. Please install eBPF tools manually:"
echo "- bpftrace"
echo "- perf (linux-tools)"
echo "- BCC tools (optional)"
fi
}
# Check eBPF capabilities of the current system
check_ebpf_capabilities() {
echo "Checking eBPF capabilities..."
# Check kernel version
kernel_version=$(uname -r)
echo "Kernel version: $kernel_version"
# Check if eBPF is enabled in kernel
if [ -f /proc/config.gz ]; then
if zcat /proc/config.gz | grep -q "CONFIG_BPF=y"; then
echo "✓ eBPF support enabled in kernel"
else
echo "✗ eBPF support not found in kernel config"
fi
elif [ -f "/boot/config-$(uname -r)" ]; then
if grep -q "CONFIG_BPF=y" "/boot/config-$(uname -r)"; then
echo "✓ eBPF support enabled in kernel"
else
echo "✗ eBPF support not found in kernel config"
fi
else
echo "? Unable to check kernel eBPF config"
fi
# Check available tools
echo ""
echo "Available eBPF tools:"
tools=("bpftrace" "perf" "execsnoop" "opensnoop" "tcpconnect" "biotop")
for tool in "${tools[@]}"; do
if command -v "$tool" >/dev/null 2>&1; then
echo "$tool"
else
echo "$tool"
fi
done
# Check debugfs mount
if mount | grep -q debugfs; then
echo "✓ debugfs mounted"
else
echo "✗ debugfs not mounted (required for ftrace)"
echo " To mount: sudo mount -t debugfs none /sys/kernel/debug"
fi
# Check if we can load eBPF programs
echo ""
echo "Testing eBPF program loading..."
if bpftrace -e 'BEGIN { print("eBPF test successful"); exit(); }' >/dev/null 2>&1; then
echo "✓ eBPF program loading works"
else
echo "✗ eBPF program loading failed (may need root privileges)"
fi
}
# Create simple syscall monitoring script
create_syscall_monitor() {
cat > /tmp/nannyagent_syscall_monitor.bt << 'EOF'
#!/usr/bin/env bpftrace
BEGIN {
printf("Monitoring syscalls... Press Ctrl-C to stop\n");
printf("[\n");
}
tracepoint:syscalls:sys_enter_* {
printf("{\"timestamp\":%llu,\"event_type\":\"syscall_enter\",\"process_id\":%d,\"process_name\":\"%s\",\"syscall\":\"%s\",\"user_id\":%d},\n",
nsecs, pid, comm, probe, uid);
}
END {
printf("]\n");
}
EOF
chmod +x /tmp/nannyagent_syscall_monitor.bt
echo "Syscall monitor created: /tmp/nannyagent_syscall_monitor.bt"
}
# Create network activity monitor
create_network_monitor() {
cat > /tmp/nannyagent_network_monitor.bt << 'EOF'
#!/usr/bin/env bpftrace
BEGIN {
printf("Monitoring network activity... Press Ctrl-C to stop\n");
printf("[\n");
}
kprobe:tcp_sendmsg,
kprobe:tcp_recvmsg,
kprobe:udp_sendmsg,
kprobe:udp_recvmsg {
$action = (probe =~ /send/ ? "send" : "recv");
$protocol = (probe =~ /tcp/ ? "tcp" : "udp");
printf("{\"timestamp\":%llu,\"event_type\":\"network_%s\",\"protocol\":\"%s\",\"process_id\":%d,\"process_name\":\"%s\"},\n",
nsecs, $action, $protocol, pid, comm);
}
END {
printf("]\n");
}
EOF
chmod +x /tmp/nannyagent_network_monitor.bt
echo "Network monitor created: /tmp/nannyagent_network_monitor.bt"
}
# Create file access monitor
create_file_monitor() {
cat > /tmp/nannyagent_file_monitor.bt << 'EOF'
#!/usr/bin/env bpftrace
BEGIN {
printf("Monitoring file access... Press Ctrl-C to stop\n");
printf("[\n");
}
tracepoint:syscalls:sys_enter_openat {
printf("{\"timestamp\":%llu,\"event_type\":\"file_open\",\"process_id\":%d,\"process_name\":\"%s\",\"filename\":\"%s\",\"flags\":%d},\n",
nsecs, pid, comm, str(args->pathname), args->flags);
}
tracepoint:syscalls:sys_enter_unlinkat {
printf("{\"timestamp\":%llu,\"event_type\":\"file_delete\",\"process_id\":%d,\"process_name\":\"%s\",\"filename\":\"%s\"},\n",
nsecs, pid, comm, str(args->pathname));
}
END {
printf("]\n");
}
EOF
chmod +x /tmp/nannyagent_file_monitor.bt
echo "File monitor created: /tmp/nannyagent_file_monitor.bt"
}
# Create process monitor
create_process_monitor() {
cat > /tmp/nannyagent_process_monitor.bt << 'EOF'
#!/usr/bin/env bpftrace
BEGIN {
printf("Monitoring process activity... Press Ctrl-C to stop\n");
printf("[\n");
}
tracepoint:syscalls:sys_enter_execve {
printf("{\"timestamp\":%llu,\"event_type\":\"process_exec\",\"process_id\":%d,\"process_name\":\"%s\",\"filename\":\"%s\"},\n",
nsecs, pid, comm, str(args->filename));
}
tracepoint:sched:sched_process_exit {
printf("{\"timestamp\":%llu,\"event_type\":\"process_exit\",\"process_id\":%d,\"process_name\":\"%s\",\"exit_code\":%d},\n",
nsecs, args->pid, args->comm, args->code);
}
END {
printf("]\n");
}
EOF
chmod +x /tmp/nannyagent_process_monitor.bt
echo "Process monitor created: /tmp/nannyagent_process_monitor.bt"
}
# Performance monitoring setup
setup_performance_monitoring() {
echo "Setting up performance monitoring..."
# Create performance monitoring script
cat > /tmp/nannyagent_perf_monitor.sh << 'EOF'
#!/bin/bash
DURATION=${1:-10}
OUTPUT_FILE=${2:-/tmp/nannyagent_perf_output.json}
echo "Running performance monitoring for $DURATION seconds..."
echo "[" > "$OUTPUT_FILE"
# Sample system performance every second
for i in $(seq 1 $DURATION); do
timestamp=$(date +%s)000000000
cpu_percent=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
memory_percent=$(free | grep Mem | awk '{printf "%.1f", $3/$2 * 100.0}')
load_avg=$(uptime | awk -F'load average:' '{print $2}' | xargs)
echo "{\"timestamp\":$timestamp,\"event_type\":\"performance_sample\",\"cpu_percent\":\"$cpu_percent\",\"memory_percent\":\"$memory_percent\",\"load_avg\":\"$load_avg\"}," >> "$OUTPUT_FILE"
[ $i -lt $DURATION ] && sleep 1
done
echo "]" >> "$OUTPUT_FILE"
echo "Performance data saved to $OUTPUT_FILE"
EOF
chmod +x /tmp/nannyagent_perf_monitor.sh
echo "Performance monitor created: /tmp/nannyagent_perf_monitor.sh"
}
# Main function
main() {
check_root
case "${1:-help}" in
"install")
install_ebpf_tools
;;
"check")
check_ebpf_capabilities
;;
"setup")
echo "Setting up eBPF monitoring scripts..."
create_syscall_monitor
create_network_monitor
create_file_monitor
create_process_monitor
setup_performance_monitoring
echo "All eBPF monitoring scripts created in /tmp/"
;;
"test")
echo "Testing eBPF functionality..."
check_ebpf_capabilities
if command -v bpftrace >/dev/null 2>&1; then
echo "Running quick eBPF test..."
timeout 5s bpftrace -e 'BEGIN { print("eBPF is working!"); } tracepoint:syscalls:sys_enter_openat { @[comm] = count(); } END { print(@); clear(@); }'
fi
;;
"help"|*)
echo "eBPF Helper Script for NannyAgent"
echo ""
echo "Usage: $0 [command]"
echo ""
echo "Commands:"
echo " install - Install eBPF tools on the system"
echo " check - Check eBPF capabilities"
echo " setup - Create eBPF monitoring scripts"
echo " test - Test eBPF functionality"
echo " help - Show this help message"
echo ""
echo "Examples:"
echo " $0 check # Check what eBPF tools are available"
echo " $0 install # Install eBPF tools (requires root)"
echo " $0 setup # Create monitoring scripts"
echo " $0 test # Test eBPF functionality"
;;
esac
}
# Run main function with all arguments
main "$@"

85
scripts/install.sh Executable file
View File

@@ -0,0 +1,85 @@
#!/bin/bash
# Linux Diagnostic Agent Installation Script
# This script installs the nanny-agent on a Linux system
set -e
echo "🔧 Linux Diagnostic Agent Installation Script"
echo "=============================================="
# Check if Go is installed
if ! command -v go &> /dev/null; then
echo "❌ Go is not installed. Please install Go first:"
echo ""
echo "For Ubuntu/Debian:"
echo " sudo apt update && sudo apt install golang-go"
echo ""
echo "For RHEL/CentOS/Fedora:"
echo " sudo dnf install golang"
echo " # or"
echo " sudo yum install golang"
echo ""
exit 1
fi
echo "✅ Go is installed: $(go version)"
# Build the application
echo "🔨 Building the application..."
go mod tidy
make build
# Check if build was successful
if [ ! -f "./nanny-agent" ]; then
echo "❌ Build failed! nanny-agent binary not found."
exit 1
fi
echo "✅ Build successful!"
# Ask for installation preference
echo ""
echo "Installation options:"
echo "1. Install system-wide (/usr/local/bin) - requires sudo"
echo "2. Keep in current directory"
echo ""
read -p "Choose option (1 or 2): " choice
case $choice in
1)
echo "📦 Installing system-wide..."
sudo cp nanny-agent /usr/local/bin/
sudo chmod +x /usr/local/bin/nanny-agent
echo "✅ Agent installed to /usr/local/bin/nanny-agent"
echo ""
echo "You can now run the agent from anywhere with:"
echo " nanny-agent"
;;
2)
echo "✅ Agent ready in current directory"
echo ""
echo "Run the agent with:"
echo " ./nanny-agent"
;;
*)
echo "❌ Invalid choice. Agent is available in current directory."
echo "Run with: ./nanny-agent"
;;
esac
# Configuration
echo ""
echo "📝 Configuration:"
echo "Set these environment variables to configure the agent:"
echo ""
echo "export NANNYAPI_ENDPOINT=\"http://your-nannyapi-host:3000/openai/v1\""
echo "export NANNYAPI_MODEL=\"your-model-identifier\""
echo ""
echo "Or create a .env file in the working directory."
echo ""
echo "🎉 Installation complete!"
echo ""
echo "Example usage:"
echo " ./nanny-agent"
echo " > On /var filesystem I cannot create any file but df -h shows 30% free space available."

116
scripts/integration-tests.sh Executable file
View File

@@ -0,0 +1,116 @@
#!/bin/bash
# Linux Diagnostic Agent - Integration Tests
# This script creates realistic Linux problem scenarios for testing
set -e
AGENT_BINARY="./nanny-agent"
TEST_DIR="/tmp/nanny-agent-tests"
TEST_LOG="$TEST_DIR/integration_test.log"
# Color codes for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Ensure test directory exists
mkdir -p "$TEST_DIR"
echo -e "${BLUE}🧪 Linux Diagnostic Agent - Integration Tests${NC}"
echo "================================================="
echo ""
# Check if agent binary exists
if [[ ! -f "$AGENT_BINARY" ]]; then
echo -e "${RED}❌ Agent binary not found at $AGENT_BINARY${NC}"
echo "Please run: make build"
exit 1
fi
# Function to run a test scenario
run_test() {
local test_name="$1"
local scenario="$2"
local expected_keywords="$3"
echo -e "${YELLOW}📋 Test: $test_name${NC}"
echo "Scenario: $scenario"
echo ""
# Run the agent with the scenario
echo "$scenario" | timeout 120s "$AGENT_BINARY" > "$TEST_LOG" 2>&1 || true
# Check if any expected keywords are found in the output
local found_keywords=0
IFS=',' read -ra KEYWORDS <<< "$expected_keywords"
for keyword in "${KEYWORDS[@]}"; do
keyword=$(echo "$keyword" | xargs) # trim whitespace
if grep -qi "$keyword" "$TEST_LOG"; then
echo -e "${GREEN} ✅ Found expected keyword: $keyword${NC}"
((found_keywords++))
else
echo -e "${RED} ❌ Missing keyword: $keyword${NC}"
fi
done
# Show summary
if [[ $found_keywords -gt 0 ]]; then
echo -e "${GREEN} ✅ Test PASSED ($found_keywords keywords found)${NC}"
else
echo -e "${RED} ❌ Test FAILED (no expected keywords found)${NC}"
fi
echo ""
echo "Full output saved to: $TEST_LOG"
echo "----------------------------------------"
echo ""
}
# Test Scenario 1: Disk Space Issues (Inode Exhaustion)
run_test "Disk Space - Inode Exhaustion" \
"I cannot create new files in /home directory even though df -h shows plenty of space available. Getting 'No space left on device' error when trying to touch new files." \
"inode,df -i,filesystem,inodes,exhausted"
# Test Scenario 2: Memory Issues
run_test "Memory Issues - OOM Killer" \
"My applications keep getting killed randomly and I see 'killed' messages in logs. The system becomes unresponsive for a few seconds before recovering. This happens especially when running memory-intensive tasks." \
"memory,oom,killed,dmesg,free,swap"
# Test Scenario 3: Network Connectivity Issues
run_test "Network Connectivity - DNS Resolution" \
"I can ping IP addresses directly (like 8.8.8.8) but cannot resolve domain names. Web browsing fails with DNS resolution errors, but ping 8.8.8.8 works fine." \
"dns,resolv.conf,nslookup,nameserver,dig"
# Test Scenario 4: Service/Process Issues
run_test "Service Issues - High Load" \
"System load average is consistently above 10.0 even when CPU usage appears normal. Applications are responding slowly and I notice high wait times. The server feels sluggish overall." \
"load,average,cpu,iostat,vmstat,processes"
# Test Scenario 5: File System Issues
run_test "Filesystem Issues - Permission Problems" \
"Web server returns 403 Forbidden errors for all pages. Files exist and seem readable, but nginx logs show permission denied errors. SELinux is disabled and file permissions look correct." \
"permission,403,nginx,chmod,chown,selinux"
# Test Scenario 6: Boot/System Issues
run_test "Boot Issues - Kernel Module" \
"System boots but some hardware devices are not working. Network interface shows as down, USB devices are not recognized, and dmesg shows module loading failures." \
"module,lsmod,dmesg,hardware,interface,usb"
# Test Scenario 7: Performance Issues
run_test "Performance Issues - I/O Bottleneck" \
"Database queries are extremely slow, taking 30+ seconds for simple SELECT statements. Disk activity LED is constantly on and system feels unresponsive during database operations." \
"iostat,iotop,disk,database,slow,performance"
echo -e "${BLUE}🏁 Integration Tests Complete${NC}"
echo ""
echo "Check individual test logs in: $TEST_DIR"
echo ""
echo -e "${YELLOW}💡 Tips:${NC}"
echo "- Tests use realistic scenarios that could occur on production systems"
echo "- Each test expects the AI to suggest relevant diagnostic commands"
echo "- Review the full logs to see the complete diagnostic conversation"
echo "- Tests timeout after 120 seconds to prevent hanging"
echo "- Make sure NANNYAPI_ENDPOINT and NANNYAPI_MODEL are set correctly"