Files
nannyagent/main.go
Harshavardhan Musanalli c268a3a42e Somewhat okay refactoring
2025-11-08 21:48:59 +01:00

238 lines
7.0 KiB
Go

package main
import (
"bufio"
"fmt"
"log"
"os"
"os/exec"
"strconv"
"strings"
"syscall"
"time"
"nannyagentv2/internal/auth"
"nannyagentv2/internal/config"
"nannyagentv2/internal/logging"
"nannyagentv2/internal/metrics"
"nannyagentv2/internal/types"
"nannyagentv2/internal/websocket"
)
const Version = "v2.0.0"
// checkRootPrivileges ensures the program is running as root
func checkRootPrivileges() {
if os.Geteuid() != 0 {
logging.Error("This program must be run as root for eBPF functionality")
logging.Error("Please run with: sudo %s", os.Args[0])
logging.Error("Reason: eBPF programs require root privileges to:\n - Load programs into the kernel\n - Attach to kernel functions and tracepoints\n - Access kernel memory maps")
os.Exit(1)
}
}
// checkKernelVersionCompatibility ensures kernel version is 4.4 or higher
func checkKernelVersionCompatibility() {
output, err := exec.Command("uname", "-r").Output()
if err != nil {
logging.Error("Cannot determine kernel version: %v", err)
os.Exit(1)
}
kernelVersion := strings.TrimSpace(string(output))
// Parse version (e.g., "5.15.0-56-generic" -> major=5, minor=15)
parts := strings.Split(kernelVersion, ".")
if len(parts) < 2 {
logging.Error("Cannot parse kernel version: %s", kernelVersion)
os.Exit(1)
}
major, err := strconv.Atoi(parts[0])
if err != nil {
logging.Error("Cannot parse major kernel version: %s", parts[0])
os.Exit(1)
}
minor, err := strconv.Atoi(parts[1])
if err != nil {
logging.Error("Cannot parse minor kernel version: %s", parts[1])
os.Exit(1)
}
// Check if kernel is 4.4 or higher
if major < 4 || (major == 4 && minor < 4) {
logging.Error("Kernel version %s is too old for eBPF", kernelVersion)
logging.Error("Required: Linux kernel 4.4 or higher")
logging.Error("Current: %s", kernelVersion)
logging.Error("Reason: eBPF requires kernel features introduced in 4.4+:\n - BPF system call support\n - eBPF program types (kprobe, tracepoint)\n - BPF maps and helper functions")
os.Exit(1)
}
}
// checkEBPFSupport validates eBPF subsystem availability
func checkEBPFSupport() {
// Check if /sys/kernel/debug/tracing exists (debugfs mounted)
if _, err := os.Stat("/sys/kernel/debug/tracing"); os.IsNotExist(err) {
logging.Warning("debugfs not mounted. Some eBPF features may not work")
logging.Info("To fix: sudo mount -t debugfs debugfs /sys/kernel/debug")
}
// Check if we can access BPF syscall
fd, _, errno := syscall.Syscall(321, 0, 0, 0) // BPF syscall number on x86_64
if errno != 0 && errno != syscall.EINVAL {
logging.Error("BPF syscall not available (errno: %v)", errno)
logging.Error("This may indicate:\n - Kernel compiled without BPF support\n - BPF syscall disabled in kernel config")
os.Exit(1)
}
if fd > 0 {
syscall.Close(int(fd))
}
}
// runInteractiveDiagnostics starts the interactive diagnostic session
func runInteractiveDiagnostics(agent *LinuxDiagnosticAgent) {
logging.Info("=== Linux eBPF-Enhanced Diagnostic Agent ===")
logging.Info("Linux Diagnostic Agent Started")
logging.Info("Enter a system issue description (or 'quit' to exit):")
scanner := bufio.NewScanner(os.Stdin)
for {
fmt.Print("> ")
if !scanner.Scan() {
break
}
input := strings.TrimSpace(scanner.Text())
if input == "quit" || input == "exit" {
break
}
if input == "" {
continue
}
// Process the issue with AI capabilities via TensorZero
if err := agent.DiagnoseIssue(input); err != nil {
logging.Error("Diagnosis failed: %v", err)
}
}
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
logging.Info("Goodbye!")
}
func main() {
logging.Info("NannyAgent v%s starting...", Version)
// Perform system compatibility checks first
logging.Info("Performing system compatibility checks...")
checkRootPrivileges()
checkKernelVersionCompatibility()
checkEBPFSupport()
logging.Info("All system checks passed")
// Load configuration
cfg, err := config.LoadConfig()
if err != nil {
log.Fatalf("❌ Failed to load configuration: %v", err)
}
cfg.PrintConfig()
// Initialize components
authManager := auth.NewAuthManager(cfg)
metricsCollector := metrics.NewCollector(Version)
// Ensure authentication
token, err := authManager.EnsureAuthenticated()
if err != nil {
log.Fatalf("❌ Authentication failed: %v", err)
}
logging.Info("Authentication successful!")
// Initialize the diagnostic agent for interactive CLI use with authentication
agent := NewLinuxDiagnosticAgentWithAuth(authManager)
// Initialize a separate agent for WebSocket investigations using the application model
applicationAgent := NewLinuxDiagnosticAgent()
applicationAgent.model = "tensorzero::function_name::diagnose_and_heal_application"
// Start WebSocket client for backend communications and investigations
wsClient := websocket.NewWebSocketClient(applicationAgent, authManager)
go func() {
if err := wsClient.Start(); err != nil {
logging.Error("WebSocket client error: %v", err)
}
}()
// Start background metrics collection in a goroutine
go func() {
logging.Debug("Starting background metrics collection and heartbeat...")
ticker := time.NewTicker(time.Duration(cfg.MetricsInterval) * time.Second)
defer ticker.Stop()
// Send initial heartbeat
if err := sendHeartbeat(cfg, token, metricsCollector); err != nil {
logging.Warning("Initial heartbeat failed: %v", err)
}
// Main heartbeat loop
for range ticker.C {
// Check if token needs refresh
if authManager.IsTokenExpired(token) {
logging.Debug("Token expiring soon, refreshing...")
newToken, refreshErr := authManager.EnsureAuthenticated()
if refreshErr != nil {
logging.Warning("Token refresh failed: %v", refreshErr)
continue
}
token = newToken
logging.Debug("Token refreshed successfully")
}
// Send heartbeat
if err := sendHeartbeat(cfg, token, metricsCollector); err != nil {
logging.Warning("Heartbeat failed: %v", err)
// If unauthorized, try to refresh token
if err.Error() == "unauthorized" {
logging.Debug("Unauthorized, attempting token refresh...")
newToken, refreshErr := authManager.EnsureAuthenticated()
if refreshErr != nil {
logging.Warning("Token refresh failed: %v", refreshErr)
continue
}
token = newToken
// Retry heartbeat with new token (silently)
if retryErr := sendHeartbeat(cfg, token, metricsCollector); retryErr != nil {
logging.Warning("Retry heartbeat failed: %v", retryErr)
}
}
}
// No logging for successful heartbeats - they should be silent
}
}()
// Start the interactive diagnostic session (blocking)
runInteractiveDiagnostics(agent)
}
// sendHeartbeat collects metrics and sends heartbeat to the server
func sendHeartbeat(cfg *config.Config, token *types.AuthToken, collector *metrics.Collector) error {
// Collect system metrics
systemMetrics, err := collector.GatherSystemMetrics()
if err != nil {
return fmt.Errorf("failed to gather system metrics: %w", err)
}
// Send metrics using the collector with correct agent_id from token
return collector.SendMetrics(cfg.AgentAuthURL, token.AccessToken, token.AgentID, systemMetrics)
}