package main import ( "bytes" "context" "encoding/json" "fmt" "io" "net/http" "os" "time" "github.com/sashabaranov/go-openai" ) // DiagnosticResponse represents the diagnostic phase response from AI type DiagnosticResponse struct { ResponseType string `json:"response_type"` Reasoning string `json:"reasoning"` Commands []Command `json:"commands"` } // ResolutionResponse represents the resolution phase response from AI type ResolutionResponse struct { ResponseType string `json:"response_type"` RootCause string `json:"root_cause"` ResolutionPlan string `json:"resolution_plan"` Confidence string `json:"confidence"` } // Command represents a command to be executed type Command struct { ID string `json:"id"` Command string `json:"command"` Description string `json:"description"` } // CommandResult represents the result of executing a command type CommandResult struct { ID string `json:"id"` Command string `json:"command"` Output string `json:"output"` ExitCode int `json:"exit_code"` Error string `json:"error,omitempty"` } // LinuxDiagnosticAgent represents the main agent type LinuxDiagnosticAgent struct { client *openai.Client model string executor *CommandExecutor episodeID string // TensorZero episode ID for conversation continuity ebpfManager EBPFManagerInterface // eBPF monitoring capabilities } // NewLinuxDiagnosticAgent creates a new diagnostic agent func NewLinuxDiagnosticAgent() *LinuxDiagnosticAgent { // Get Supabase project URL for TensorZero proxy supabaseURL := os.Getenv("SUPABASE_PROJECT_URL") if supabaseURL == "" { fmt.Printf("Warning: SUPABASE_PROJECT_URL not set, TensorZero integration will not work\n") supabaseURL = "https://gpqzsricripnvbrpsyws.supabase.co" // fallback } model := os.Getenv("NANNYAPI_MODEL") if model == "" { model = "tensorzero::function_name::diagnose_and_heal" fmt.Printf("Warning: Using default model '%s'. Set NANNYAPI_MODEL environment variable for your specific function.\n", model) } // Note: We don't use the OpenAI client anymore, we use direct HTTP to Supabase proxy agent := &LinuxDiagnosticAgent{ client: nil, // Not used anymore model: model, executor: NewCommandExecutor(10 * time.Second), // 10 second timeout for commands } // Initialize eBPF capabilities agent.ebpfManager = NewCiliumEBPFManager() return agent } // DiagnoseIssue starts the diagnostic process for a given issue func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error { fmt.Printf("Diagnosing issue: %s\n", issue) fmt.Println("Gathering system information...") // Gather system information systemInfo := GatherSystemInfo() // Format the initial prompt with system information initialPrompt := FormatSystemInfoForPrompt(systemInfo) + "\n" + issue // Start conversation with initial issue including system info messages := []openai.ChatCompletionMessage{ { Role: openai.ChatMessageRoleUser, Content: initialPrompt, }, } for { // Send request to TensorZero API via OpenAI SDK response, err := a.sendRequestWithEpisode(messages, a.episodeID) if err != nil { return fmt.Errorf("failed to send request: %w", err) } if len(response.Choices) == 0 { return fmt.Errorf("no choices in response") } content := response.Choices[0].Message.Content fmt.Printf("\nAI Response:\n%s\n", content) // Parse the response to determine next action var diagnosticResp EBPFEnhancedDiagnosticResponse var resolutionResp ResolutionResponse // Try to parse as diagnostic response first (with eBPF support) if err := json.Unmarshal([]byte(content), &diagnosticResp); err == nil && diagnosticResp.ResponseType == "diagnostic" { // Handle diagnostic phase fmt.Printf("\nReasoning: %s\n", diagnosticResp.Reasoning) // Execute commands and collect results commandResults := make([]CommandResult, 0, len(diagnosticResp.Commands)) if len(diagnosticResp.Commands) > 0 { fmt.Printf("🔧 Executing diagnostic commands...\n") for _, cmd := range diagnosticResp.Commands { fmt.Printf("⚙️ Executing command '%s': %s\n", cmd.ID, cmd.Command) result := a.executor.Execute(cmd) commandResults = append(commandResults, result) if result.ExitCode == 0 { fmt.Printf("✅ Command '%s' completed successfully\n", cmd.ID) } else { fmt.Printf("❌ Command '%s' failed with exit code %d\n", cmd.ID, result.ExitCode) } } } // Execute eBPF programs if present var ebpfResults []map[string]interface{} if len(diagnosticResp.EBPFPrograms) > 0 { fmt.Printf("🔬 Executing %d eBPF programs...\n", len(diagnosticResp.EBPFPrograms)) ebpfResults = a.executeEBPFPrograms(diagnosticResp.EBPFPrograms) } // Prepare combined results as user message allResults := map[string]interface{}{ "command_results": commandResults, "executed_commands": len(commandResults), } // Include eBPF results if any were executed if len(ebpfResults) > 0 { allResults["ebpf_results"] = ebpfResults allResults["executed_ebpf_programs"] = len(ebpfResults) // Extract evidence summary for TensorZero evidenceSummary := make([]string, 0) for _, result := range ebpfResults { name := result["name"] eventCount := result["data_points"] description := result["description"] status := result["status"] summaryStr := fmt.Sprintf("%s: %v events (%s) - %s", name, eventCount, status, description) evidenceSummary = append(evidenceSummary, summaryStr) } allResults["ebpf_evidence_summary"] = evidenceSummary fmt.Printf("� Sending eBPF monitoring data to TensorZero:\n") for _, summary := range evidenceSummary { fmt.Printf(" - %s\n", summary) } fmt.Printf("✅ Executed %d commands, %d eBPF programs\n", len(commandResults), len(ebpfResults)) } else { fmt.Printf("✅ Executed %d commands\n", len(commandResults)) } resultsJSON, err := json.MarshalIndent(allResults, "", " ") if err != nil { return fmt.Errorf("failed to marshal command results: %w", err) } // Add AI response and command results to conversation messages = append(messages, openai.ChatCompletionMessage{ Role: openai.ChatMessageRoleAssistant, Content: content, }) messages = append(messages, openai.ChatCompletionMessage{ Role: openai.ChatMessageRoleUser, Content: string(resultsJSON), }) continue } // Try to parse as resolution response if err := json.Unmarshal([]byte(content), &resolutionResp); err == nil && resolutionResp.ResponseType == "resolution" { // Handle resolution phase fmt.Printf("\n=== DIAGNOSIS COMPLETE ===\n") fmt.Printf("Root Cause: %s\n", resolutionResp.RootCause) fmt.Printf("Resolution Plan: %s\n", resolutionResp.ResolutionPlan) fmt.Printf("Confidence: %s\n", resolutionResp.Confidence) break } // If we can't parse the response, treat it as an error or unexpected format fmt.Printf("Unexpected response format or error from AI:\n%s\n", content) break } return nil } // executeEBPFPrograms executes REAL eBPF monitoring programs using the actual eBPF manager func (a *LinuxDiagnosticAgent) executeEBPFPrograms(ebpfPrograms []EBPFRequest) []map[string]interface{} { var results []map[string]interface{} if a.ebpfManager == nil { fmt.Printf("❌ eBPF manager not initialized\n") return results } for _, prog := range ebpfPrograms { fmt.Printf("🔬 Starting eBPF program [%s]: %s -> %s (%ds)\n", prog.Name, prog.Type, prog.Target, int(prog.Duration)) // Actually start the eBPF program using the real manager programID, err := a.ebpfManager.StartEBPFProgram(prog) if err != nil { fmt.Printf("❌ Failed to start eBPF program [%s]: %v\n", prog.Name, err) result := map[string]interface{}{ "name": prog.Name, "type": prog.Type, "target": prog.Target, "duration": int(prog.Duration), "description": prog.Description, "status": "failed", "error": err.Error(), "success": false, } results = append(results, result) continue } // Let the eBPF program run for the specified duration fmt.Printf("⏰ Waiting %d seconds for eBPF program to collect data...\n", int(prog.Duration)) time.Sleep(time.Duration(prog.Duration) * time.Second) // Give the collectEvents goroutine a moment to finish and store results fmt.Printf("⏳ Allowing program to complete data collection...\n") time.Sleep(500 * time.Millisecond) // Get the results (should be in completedResults now) fmt.Printf("📊 Getting results for eBPF program [%s]...\n", prog.Name) // Use a channel to implement timeout for GetProgramResults type resultPair struct { trace *EBPFTrace err error } resultChan := make(chan resultPair, 1) go func() { trace, err := a.ebpfManager.GetProgramResults(programID) resultChan <- resultPair{trace, err} }() var trace *EBPFTrace var resultErr error select { case result := <-resultChan: trace = result.trace resultErr = result.err case <-time.After(3 * time.Second): resultErr = fmt.Errorf("timeout getting results after 3 seconds") } // Try to stop the program (may already be stopped by collectEvents) fmt.Printf("🛑 Stopping eBPF program [%s]...\n", prog.Name) stopErr := a.ebpfManager.StopProgram(programID) if stopErr != nil { fmt.Printf("⚠️ eBPF program [%s] cleanup: %v (may have already completed)\n", prog.Name, stopErr) // Don't return here, we still want to process results if we got them } if resultErr != nil { fmt.Printf("❌ Failed to get results for eBPF program [%s]: %v\n", prog.Name, resultErr) result := map[string]interface{}{ "name": prog.Name, "type": prog.Type, "target": prog.Target, "duration": int(prog.Duration), "description": prog.Description, "status": "collection_failed", "error": resultErr.Error(), "success": false, } results = append(results, result) continue } // Process the real eBPF trace data result := map[string]interface{}{ "name": prog.Name, "type": prog.Type, "target": prog.Target, "duration": int(prog.Duration), "description": prog.Description, "status": "completed", "success": true, } // Extract real data from the trace if trace != nil { result["trace_id"] = trace.TraceID result["data_points"] = trace.EventCount result["events"] = trace.Events result["summary"] = trace.Summary result["process_list"] = trace.ProcessList result["start_time"] = trace.StartTime.Format(time.RFC3339) result["end_time"] = trace.EndTime.Format(time.RFC3339) result["actual_duration"] = trace.EndTime.Sub(trace.StartTime).Seconds() fmt.Printf("✅ eBPF program [%s] completed - collected %d real events\n", prog.Name, trace.EventCount) } else { result["data_points"] = 0 result["error"] = "No trace data returned" fmt.Printf("⚠️ eBPF program [%s] completed but returned no trace data\n", prog.Name) } results = append(results, result) } return results } // TensorZeroRequest represents a request structure compatible with TensorZero's episode_id type TensorZeroRequest struct { Model string `json:"model"` Messages []openai.ChatCompletionMessage `json:"messages"` EpisodeID string `json:"tensorzero::episode_id,omitempty"` } // TensorZeroResponse represents TensorZero's response with episode_id type TensorZeroResponse struct { openai.ChatCompletionResponse EpisodeID string `json:"episode_id"` } // sendRequest sends a request to the TensorZero API via Supabase proxy with JWT authentication func (a *LinuxDiagnosticAgent) sendRequest(messages []openai.ChatCompletionMessage) (*openai.ChatCompletionResponse, error) { return a.sendRequestWithEpisode(messages, "") } // sendRequestWithEpisode sends a request with a specific episode ID func (a *LinuxDiagnosticAgent) sendRequestWithEpisode(messages []openai.ChatCompletionMessage, episodeID string) (*openai.ChatCompletionResponse, error) { ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() // Create TensorZero-compatible request tzRequest := TensorZeroRequest{ Model: a.model, Messages: messages, } // Include tensorzero::episode_id for conversation continuity // Use agent's existing episode ID if available, otherwise use provided one if a.episodeID != "" { tzRequest.EpisodeID = a.episodeID } else if episodeID != "" { tzRequest.EpisodeID = episodeID } fmt.Printf("Debug: Sending request to model: %s", a.model) if a.episodeID != "" { fmt.Printf(" (episode: %s)", a.episodeID) } fmt.Println() // Marshal the request requestBody, err := json.Marshal(tzRequest) if err != nil { return nil, fmt.Errorf("failed to marshal request: %w", err) } // Get Supabase project URL and build TensorZero proxy endpoint supabaseURL := os.Getenv("SUPABASE_PROJECT_URL") if supabaseURL == "" { supabaseURL = "https://gpqzsricripnvbrpsyws.supabase.co" } // Build Supabase function URL with OpenAI v1 compatible path endpoint := supabaseURL + "/functions/v1/tensorzero-proxy/openai/v1/chat/completions" req, err := http.NewRequestWithContext(ctx, "POST", endpoint, bytes.NewBuffer(requestBody)) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } req.Header.Set("Content-Type", "application/json") // Add JWT authentication header accessToken, err := a.getAccessToken() if err != nil { return nil, fmt.Errorf("failed to get access token: %w", err) } req.Header.Set("Authorization", "Bearer "+accessToken) // Make the request client := &http.Client{Timeout: 30 * time.Second} resp, err := client.Do(req) if err != nil { return nil, fmt.Errorf("failed to send request: %w", err) } defer resp.Body.Close() // Read response body body, err := io.ReadAll(resp.Body) if err != nil { return nil, fmt.Errorf("failed to read response: %w", err) } if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("TensorZero API request failed with status %d: %s", resp.StatusCode, string(body)) } // Parse TensorZero response var tzResponse TensorZeroResponse if err := json.Unmarshal(body, &tzResponse); err != nil { return nil, fmt.Errorf("failed to unmarshal response: %w", err) } // Extract episode_id from first response if a.episodeID == "" && tzResponse.EpisodeID != "" { a.episodeID = tzResponse.EpisodeID fmt.Printf("Debug: Extracted episode ID: %s\n", a.episodeID) } return &tzResponse.ChatCompletionResponse, nil } // getAccessToken retrieves the current access token for authentication func (a *LinuxDiagnosticAgent) getAccessToken() (string, error) { // Read token from the standard token file location tokenPath := os.Getenv("TOKEN_PATH") if tokenPath == "" { tokenPath = "/var/lib/nannyagent/token.json" } tokenData, err := os.ReadFile(tokenPath) if err != nil { return "", fmt.Errorf("failed to read token file: %w", err) } var tokenInfo struct { AccessToken string `json:"access_token"` } if err := json.Unmarshal(tokenData, &tokenInfo); err != nil { return "", fmt.Errorf("failed to parse token file: %w", err) } if tokenInfo.AccessToken == "" { return "", fmt.Errorf("access token is empty") } return tokenInfo.AccessToken, nil }