package main import ( "bytes" "encoding/json" "fmt" "io" "net/http" "os" "strings" "time" "nannyagentv2/internal/ebpf" "nannyagentv2/internal/executor" "nannyagentv2/internal/logging" "nannyagentv2/internal/system" "nannyagentv2/internal/types" "github.com/sashabaranov/go-openai" ) // AgentConfig holds configuration for concurrent execution (local to agent) type AgentConfig struct { MaxConcurrentTasks int `json:"max_concurrent_tasks"` CollectiveResults bool `json:"collective_results"` } // DefaultAgentConfig returns default configuration func DefaultAgentConfig() *AgentConfig { return &AgentConfig{ MaxConcurrentTasks: 10, // Default to 10 concurrent forks CollectiveResults: true, // Send results collectively when all finish } } // // LinuxDiagnosticAgent represents the main diagnostic agent // LinuxDiagnosticAgent represents the main diagnostic agent type LinuxDiagnosticAgent struct { client *openai.Client model string executor *executor.CommandExecutor episodeID string // TensorZero episode ID for conversation continuity ebpfManager *ebpf.BCCTraceManager // eBPF tracing manager config *AgentConfig // Configuration for concurrent execution authManager interface{} // Authentication manager for TensorZero requests logger *logging.Logger } // NewLinuxDiagnosticAgent creates a new diagnostic agent func NewLinuxDiagnosticAgent() *LinuxDiagnosticAgent { // Get Supabase project URL for TensorZero proxy supabaseURL := os.Getenv("SUPABASE_PROJECT_URL") if supabaseURL == "" { logging.Warning("SUPABASE_PROJECT_URL not set, TensorZero integration will not work") supabaseURL = "https://gpqzsricripnvbrpsyws.supabase.co" // fallback } model := os.Getenv("NANNYAPI_MODEL") if model == "" { model = "tensorzero::function_name::diagnose_and_heal" logging.Warning("Using default model '%s'. Set NANNYAPI_MODEL environment variable for your specific function", model) } // Note: We don't use the OpenAI client anymore, we use direct HTTP to Supabase proxy agent := &LinuxDiagnosticAgent{ client: nil, // Not used anymore model: model, executor: executor.NewCommandExecutor(10 * time.Second), // 10 second timeout for commands config: DefaultAgentConfig(), // Default concurrent execution config } // Initialize eBPF manager agent.ebpfManager = ebpf.NewBCCTraceManager() agent.logger = logging.NewLogger() return agent } // NewLinuxDiagnosticAgentWithAuth creates a new diagnostic agent with authentication func NewLinuxDiagnosticAgentWithAuth(authManager interface{}) *LinuxDiagnosticAgent { // Get Supabase project URL for TensorZero proxy supabaseURL := os.Getenv("SUPABASE_PROJECT_URL") if supabaseURL == "" { logging.Warning("SUPABASE_PROJECT_URL not set, TensorZero integration will not work") supabaseURL = "https://gpqzsricripnvbrpsyws.supabase.co" // fallback } model := os.Getenv("NANNYAPI_MODEL") if model == "" { model = "tensorzero::function_name::diagnose_and_heal" logging.Warning("Using default model '%s'. Set NANNYAPI_MODEL environment variable for your specific function", model) } // Note: We don't use the OpenAI client anymore, we use direct HTTP to Supabase proxy agent := &LinuxDiagnosticAgent{ client: nil, // Not used anymore model: model, executor: executor.NewCommandExecutor(10 * time.Second), // 10 second timeout for commands config: DefaultAgentConfig(), // Default concurrent execution config authManager: authManager, // Store auth manager for TensorZero requests } // Initialize eBPF manager agent.ebpfManager = ebpf.NewBCCTraceManager() agent.logger = logging.NewLogger() return agent } // DiagnoseIssue starts the diagnostic process for a given issue func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error { logging.Info("Diagnosing issue: %s", issue) logging.Info("Gathering system information...") // Gather system information systemInfo := system.GatherSystemInfo() // Format the initial prompt with system information initialPrompt := system.FormatSystemInfoForPrompt(systemInfo) + "\n" + issue // Start conversation with initial issue including system info messages := []openai.ChatCompletionMessage{ { Role: openai.ChatMessageRoleUser, Content: initialPrompt, }, } for { // Send request to TensorZero API via OpenAI SDK response, err := a.SendRequestWithEpisode(messages, a.episodeID) if err != nil { return fmt.Errorf("failed to send request: %w", err) } if len(response.Choices) == 0 { return fmt.Errorf("no choices in response") } content := response.Choices[0].Message.Content logging.Debug("AI Response: %s", content) // Parse the response to determine next action var diagnosticResp types.EBPFEnhancedDiagnosticResponse var resolutionResp types.ResolutionResponse // Try to parse as diagnostic response first (with eBPF support) logging.Debug("Attempting to parse response as diagnostic...") if err := json.Unmarshal([]byte(content), &diagnosticResp); err == nil && diagnosticResp.ResponseType == "diagnostic" { logging.Debug("Successfully parsed as diagnostic response with %d commands", len(diagnosticResp.Commands)) // Handle diagnostic phase logging.Debug("Reasoning: %s", diagnosticResp.Reasoning) // Execute commands and collect results commandResults := make([]types.CommandResult, 0, len(diagnosticResp.Commands)) if len(diagnosticResp.Commands) > 0 { logging.Info("Executing %d diagnostic commands", len(diagnosticResp.Commands)) for i, cmdStr := range diagnosticResp.Commands { // Convert string command to Command struct (auto-generate ID and description) cmd := types.Command{ ID: fmt.Sprintf("cmd_%d", i+1), Command: cmdStr, Description: fmt.Sprintf("Diagnostic command: %s", cmdStr), } result := a.executor.Execute(cmd) commandResults = append(commandResults, result) if result.ExitCode != 0 { logging.Warning("Command '%s' failed with exit code %d", cmd.ID, result.ExitCode) } } } // Execute eBPF programs if present - support both old and new formats var ebpfResults []map[string]interface{} if len(diagnosticResp.EBPFPrograms) > 0 { logging.Info("AI requested %d eBPF traces for enhanced diagnostics", len(diagnosticResp.EBPFPrograms)) // Convert EBPFPrograms to TraceSpecs and execute concurrently using the eBPF service traceSpecs := a.ConvertEBPFProgramsToTraceSpecs(diagnosticResp.EBPFPrograms) ebpfResults = a.ExecuteEBPFTraces(traceSpecs) } // Prepare combined results as user message allResults := map[string]interface{}{ "command_results": commandResults, "executed_commands": len(commandResults), } // Include eBPF results if any were executed if len(ebpfResults) > 0 { allResults["ebpf_results"] = ebpfResults allResults["executed_ebpf_programs"] = len(ebpfResults) // Extract evidence summary for TensorZero evidenceSummary := make([]string, 0) for _, result := range ebpfResults { target := result["target"] eventCount := result["event_count"] summary := result["summary"] success := result["success"] status := "failed" if success == true { status = "success" } summaryStr := fmt.Sprintf("%s: %v events (%s) - %s", target, eventCount, status, summary) evidenceSummary = append(evidenceSummary, summaryStr) } allResults["ebpf_evidence_summary"] = evidenceSummary } resultsJSON, err := json.MarshalIndent(allResults, "", " ") if err != nil { return fmt.Errorf("failed to marshal command results: %w", err) } // Add AI response and command results to conversation messages = append(messages, openai.ChatCompletionMessage{ Role: openai.ChatMessageRoleAssistant, Content: content, }) messages = append(messages, openai.ChatCompletionMessage{ Role: openai.ChatMessageRoleUser, Content: string(resultsJSON), }) continue } else { logging.Debug("Failed to parse as diagnostic. Error: %v, ResponseType: '%s'", err, diagnosticResp.ResponseType) } // Try to parse as resolution response if err := json.Unmarshal([]byte(content), &resolutionResp); err == nil && resolutionResp.ResponseType == "resolution" { // Handle resolution phase logging.Info("=== DIAGNOSIS COMPLETE ===") logging.Info("Root Cause: %s", resolutionResp.RootCause) logging.Info("Resolution Plan: %s", resolutionResp.ResolutionPlan) logging.Info("Confidence: %s", resolutionResp.Confidence) break } // If we can't parse the response, treat it as an error or unexpected format logging.Error("Unexpected response format or error from AI: %s", content) break } return nil } // sendRequest sends a request to TensorZero via Supabase proxy (without episode ID) func (a *LinuxDiagnosticAgent) SendRequest(messages []openai.ChatCompletionMessage) (*openai.ChatCompletionResponse, error) { return a.SendRequestWithEpisode(messages, "") } // ExecuteCommand executes a command using the agent's executor func (a *LinuxDiagnosticAgent) ExecuteCommand(cmd types.Command) types.CommandResult { return a.executor.Execute(cmd) } // sendRequestWithEpisode sends a request to TensorZero via Supabase proxy with episode ID for conversation continuity func (a *LinuxDiagnosticAgent) SendRequestWithEpisode(messages []openai.ChatCompletionMessage, episodeID string) (*openai.ChatCompletionResponse, error) { // Convert messages to the expected format messageMaps := make([]map[string]interface{}, len(messages)) for i, msg := range messages { messageMaps[i] = map[string]interface{}{ "role": msg.Role, "content": msg.Content, } } // Create TensorZero request tzRequest := map[string]interface{}{ "model": a.model, "messages": messageMaps, } // Add episode ID if provided if episodeID != "" { tzRequest["tensorzero::episode_id"] = episodeID } // Marshal request requestBody, err := json.Marshal(tzRequest) if err != nil { return nil, fmt.Errorf("failed to marshal request: %w", err) } // Get Supabase URL supabaseURL := os.Getenv("SUPABASE_PROJECT_URL") if supabaseURL == "" { return nil, fmt.Errorf("SUPABASE_PROJECT_URL not set") } // Create HTTP request to TensorZero proxy (includes OpenAI-compatible path) endpoint := fmt.Sprintf("%s/functions/v1/tensorzero-proxy/openai/v1/chat/completions", supabaseURL) logging.Debug("Calling TensorZero proxy at: %s", endpoint) req, err := http.NewRequest("POST", endpoint, bytes.NewBuffer(requestBody)) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } // Set headers req.Header.Set("Content-Type", "application/json") req.Header.Set("Accept", "application/json") // Add authentication if auth manager is available (same pattern as investigation_server.go) if a.authManager != nil { // The authManager should be *auth.AuthManager, so let's use the exact same pattern if authMgr, ok := a.authManager.(interface { LoadToken() (*types.AuthToken, error) }); ok { if authToken, err := authMgr.LoadToken(); err == nil && authToken != nil { req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", authToken.AccessToken)) } } } // Send request client := &http.Client{Timeout: 30 * time.Second} resp, err := client.Do(req) if err != nil { return nil, fmt.Errorf("failed to send request: %w", err) } defer resp.Body.Close() // Check status code if resp.StatusCode != 200 { body, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("TensorZero proxy error: %d, body: %s", resp.StatusCode, string(body)) } // Parse response var tzResponse map[string]interface{} if err := json.NewDecoder(resp.Body).Decode(&tzResponse); err != nil { return nil, fmt.Errorf("failed to decode response: %w", err) } // Convert to OpenAI format for compatibility choices, ok := tzResponse["choices"].([]interface{}) if !ok || len(choices) == 0 { return nil, fmt.Errorf("no choices in response") } // Extract the first choice firstChoice, ok := choices[0].(map[string]interface{}) if !ok { return nil, fmt.Errorf("invalid choice format") } message, ok := firstChoice["message"].(map[string]interface{}) if !ok { return nil, fmt.Errorf("invalid message format") } content, ok := message["content"].(string) if !ok { return nil, fmt.Errorf("invalid content format") } // Create OpenAI-compatible response response := &openai.ChatCompletionResponse{ Choices: []openai.ChatCompletionChoice{ { Message: openai.ChatCompletionMessage{ Role: openai.ChatMessageRoleAssistant, Content: content, }, }, }, } // Update episode ID if provided in response if respEpisodeID, ok := tzResponse["episode_id"].(string); ok && respEpisodeID != "" { a.episodeID = respEpisodeID } return response, nil } // ConvertEBPFProgramsToTraceSpecs converts old EBPFProgram format to new TraceSpec format func (a *LinuxDiagnosticAgent) ConvertEBPFProgramsToTraceSpecs(ebpfPrograms []types.EBPFRequest) []ebpf.TraceSpec { var traceSpecs []ebpf.TraceSpec for _, prog := range ebpfPrograms { spec := a.convertToTraceSpec(prog) traceSpecs = append(traceSpecs, spec) } return traceSpecs } // convertToTraceSpec converts an EBPFRequest to a TraceSpec for BCC-style tracing func (a *LinuxDiagnosticAgent) convertToTraceSpec(prog types.EBPFRequest) ebpf.TraceSpec { // Determine probe type based on target and type probeType := "p" // default to kprobe target := prog.Target if strings.HasPrefix(target, "tracepoint:") { probeType = "t" target = strings.TrimPrefix(target, "tracepoint:") } else if strings.HasPrefix(target, "kprobe:") { probeType = "p" target = strings.TrimPrefix(target, "kprobe:") } else if prog.Type == "tracepoint" { probeType = "t" } else if prog.Type == "syscall" { // Convert syscall names to kprobe targets if !strings.HasPrefix(target, "__x64_sys_") && !strings.Contains(target, ":") { if strings.HasPrefix(target, "sys_") { target = "__x64_" + target } else { target = "__x64_sys_" + target } } probeType = "p" } // Set default duration if not specified duration := prog.Duration if duration <= 0 { duration = 5 // default 5 seconds } return ebpf.TraceSpec{ ProbeType: probeType, Target: target, Format: prog.Description, // Use description as format Arguments: []string{}, // Start with no arguments for compatibility Duration: duration, UID: -1, // No UID filter (don't default to 0 which means root only) } } // executeEBPFTraces executes multiple eBPF traces using the eBPF service func (a *LinuxDiagnosticAgent) ExecuteEBPFTraces(traceSpecs []ebpf.TraceSpec) []map[string]interface{} { if len(traceSpecs) == 0 { return []map[string]interface{}{} } a.logger.Info("Executing %d eBPF traces", len(traceSpecs)) results := make([]map[string]interface{}, 0, len(traceSpecs)) // Execute each trace using the eBPF manager for i, spec := range traceSpecs { a.logger.Debug("Starting trace %d: %s", i, spec.Target) // Start the trace traceID, err := a.ebpfManager.StartTrace(spec) if err != nil { a.logger.Error("Failed to start trace %d: %v", i, err) result := map[string]interface{}{ "index": i, "target": spec.Target, "success": false, "error": err.Error(), } results = append(results, result) continue } // Wait for the trace duration time.Sleep(time.Duration(spec.Duration) * time.Second) // Get the trace result traceResult, err := a.ebpfManager.GetTraceResult(traceID) if err != nil { a.logger.Error("Failed to get results for trace %d: %v", i, err) result := map[string]interface{}{ "index": i, "target": spec.Target, "success": false, "error": err.Error(), } results = append(results, result) continue } // Build successful result result := map[string]interface{}{ "index": i, "target": spec.Target, "success": true, "event_count": traceResult.EventCount, "events_per_second": traceResult.Statistics.EventsPerSecond, "duration": traceResult.EndTime.Sub(traceResult.StartTime).Seconds(), "summary": traceResult.Summary, } results = append(results, result) a.logger.Debug("Completed trace %d: %d events", i, traceResult.EventCount) } a.logger.Info("Completed %d eBPF traces", len(results)) return results }