1) add-bpf-capability 2) Not so clean but for now it's okay to start with Co-authored-by: Harshavardhan Musanalli <harshavmb@gmail.com> Reviewed-on: #1
342 lines
11 KiB
Go
342 lines
11 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"time"
|
|
|
|
"github.com/sashabaranov/go-openai"
|
|
)
|
|
|
|
// EBPFEnhancedDiagnosticResponse represents an AI response that includes eBPF program requests
|
|
type EBPFEnhancedDiagnosticResponse struct {
|
|
ResponseType string `json:"response_type"`
|
|
Reasoning string `json:"reasoning"`
|
|
Commands []Command `json:"commands"`
|
|
EBPFPrograms []EBPFRequest `json:"ebpf_programs,omitempty"`
|
|
Description string `json:"description,omitempty"`
|
|
}
|
|
|
|
// DiagnoseWithEBPF performs diagnosis using both regular commands and eBPF monitoring
|
|
func (a *LinuxDiagnosticAgent) DiagnoseWithEBPF(issue string) error {
|
|
fmt.Printf("Diagnosing issue with eBPF monitoring: %s\n", issue)
|
|
fmt.Println("Gathering system information and eBPF capabilities...")
|
|
|
|
// Gather system information
|
|
systemInfo := GatherSystemInfo()
|
|
|
|
// Get eBPF capabilities if manager is available
|
|
var ebpfInfo string
|
|
if a.ebpfManager != nil {
|
|
capabilities := a.ebpfManager.GetCapabilities()
|
|
summary := a.ebpfManager.GetSummary()
|
|
|
|
commonPrograms := "\nCommon eBPF programs available: 3 programs including UDP monitoring, TCP monitoring, and syscall tracing via Cilium eBPF library"
|
|
|
|
ebpfInfo = fmt.Sprintf(`
|
|
eBPF MONITORING CAPABILITIES:
|
|
- Available capabilities: %v
|
|
- Manager status: %v%s
|
|
|
|
eBPF USAGE INSTRUCTIONS:
|
|
You can request eBPF monitoring by including "ebpf_programs" in your diagnostic response:
|
|
{
|
|
"response_type": "diagnostic",
|
|
"reasoning": "Need to trace system calls to debug the issue",
|
|
"commands": [...regular commands...],
|
|
"ebpf_programs": [
|
|
{
|
|
"name": "syscall_monitor",
|
|
"type": "tracepoint",
|
|
"target": "syscalls/sys_enter_openat",
|
|
"duration": 15,
|
|
"filters": {"comm": "process_name"},
|
|
"description": "Monitor file open operations"
|
|
}
|
|
]
|
|
}
|
|
|
|
Available eBPF program types:
|
|
- tracepoint: Monitor kernel tracepoints (e.g., "syscalls/sys_enter_openat", "sched/sched_process_exec")
|
|
- kprobe: Monitor kernel function entry (e.g., "tcp_connect", "vfs_read")
|
|
- kretprobe: Monitor kernel function return (e.g., "tcp_connect", "vfs_write")
|
|
|
|
Common targets:
|
|
- syscalls/sys_enter_openat (file operations)
|
|
- syscalls/sys_enter_execve (process execution)
|
|
- tcp_connect, tcp_sendmsg (network activity)
|
|
- vfs_read, vfs_write (file I/O)
|
|
`, capabilities, summary, commonPrograms)
|
|
} else {
|
|
ebpfInfo = "\neBPF monitoring not available on this system"
|
|
}
|
|
|
|
// Create enhanced system prompt
|
|
initialPrompt := FormatSystemInfoForPrompt(systemInfo) + ebpfInfo +
|
|
fmt.Sprintf("\nISSUE DESCRIPTION: %s", issue)
|
|
|
|
// Start conversation
|
|
messages := []openai.ChatCompletionMessage{
|
|
{
|
|
Role: openai.ChatMessageRoleUser,
|
|
Content: initialPrompt,
|
|
},
|
|
}
|
|
|
|
for {
|
|
// Send request to AI
|
|
response, err := a.sendRequest(messages)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to send request: %w", err)
|
|
}
|
|
|
|
if len(response.Choices) == 0 {
|
|
return fmt.Errorf("no choices in response")
|
|
}
|
|
|
|
content := response.Choices[0].Message.Content
|
|
fmt.Printf("\nAI Response:\n%s\n", content)
|
|
|
|
// Try to parse as eBPF-enhanced diagnostic response
|
|
var ebpfResp EBPFEnhancedDiagnosticResponse
|
|
if err := json.Unmarshal([]byte(content), &ebpfResp); err == nil && ebpfResp.ResponseType == "diagnostic" {
|
|
fmt.Printf("\nReasoning: %s\n", ebpfResp.Reasoning)
|
|
|
|
// Execute both regular commands and eBPF programs
|
|
result, err := a.executeWithEBPFPrograms(ebpfResp)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to execute with eBPF: %w", err)
|
|
}
|
|
|
|
// Add results to conversation
|
|
resultsJSON, err := json.MarshalIndent(result, "", " ")
|
|
if err != nil {
|
|
return fmt.Errorf("failed to marshal results: %w", err)
|
|
}
|
|
|
|
messages = append(messages, openai.ChatCompletionMessage{
|
|
Role: openai.ChatMessageRoleAssistant,
|
|
Content: content,
|
|
})
|
|
messages = append(messages, openai.ChatCompletionMessage{
|
|
Role: openai.ChatMessageRoleUser,
|
|
Content: string(resultsJSON),
|
|
})
|
|
|
|
continue
|
|
}
|
|
|
|
// Try to parse as regular diagnostic response
|
|
var diagnosticResp DiagnosticResponse
|
|
if err := json.Unmarshal([]byte(content), &diagnosticResp); err == nil && diagnosticResp.ResponseType == "diagnostic" {
|
|
fmt.Printf("\nReasoning: %s\n", diagnosticResp.Reasoning)
|
|
|
|
if len(diagnosticResp.Commands) == 0 {
|
|
fmt.Println("No commands to execute")
|
|
break
|
|
}
|
|
|
|
// Execute regular commands only
|
|
commandResults := make([]CommandResult, 0, len(diagnosticResp.Commands))
|
|
for _, cmd := range diagnosticResp.Commands {
|
|
fmt.Printf("\nExecuting command '%s': %s\n", cmd.ID, cmd.Command)
|
|
result := a.executor.Execute(cmd)
|
|
commandResults = append(commandResults, result)
|
|
|
|
fmt.Printf("Output:\n%s\n", result.Output)
|
|
if result.Error != "" {
|
|
fmt.Printf("Error: %s\n", result.Error)
|
|
}
|
|
}
|
|
|
|
// Add results to conversation
|
|
resultsJSON, err := json.MarshalIndent(commandResults, "", " ")
|
|
if err != nil {
|
|
return fmt.Errorf("failed to marshal results: %w", err)
|
|
}
|
|
|
|
messages = append(messages, openai.ChatCompletionMessage{
|
|
Role: openai.ChatMessageRoleAssistant,
|
|
Content: content,
|
|
})
|
|
messages = append(messages, openai.ChatCompletionMessage{
|
|
Role: openai.ChatMessageRoleUser,
|
|
Content: string(resultsJSON),
|
|
})
|
|
|
|
continue
|
|
}
|
|
|
|
// Try to parse as resolution response
|
|
var resolutionResp ResolutionResponse
|
|
if err := json.Unmarshal([]byte(content), &resolutionResp); err == nil && resolutionResp.ResponseType == "resolution" {
|
|
fmt.Printf("\n=== DIAGNOSIS COMPLETE ===\n")
|
|
fmt.Printf("Root Cause: %s\n", resolutionResp.RootCause)
|
|
fmt.Printf("Resolution Plan: %s\n", resolutionResp.ResolutionPlan)
|
|
fmt.Printf("Confidence: %s\n", resolutionResp.Confidence)
|
|
|
|
// Show any active eBPF programs
|
|
if a.ebpfManager != nil {
|
|
activePrograms := a.ebpfManager.ListActivePrograms()
|
|
if len(activePrograms) > 0 {
|
|
fmt.Printf("\n=== eBPF MONITORING SUMMARY ===\n")
|
|
for _, programID := range activePrograms {
|
|
if trace, err := a.ebpfManager.GetProgramResults(programID); err == nil {
|
|
fmt.Printf("Program %s: %s\n", programID, trace.Summary)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
break
|
|
}
|
|
|
|
// Unknown response format
|
|
fmt.Printf("Unexpected response format:\n%s\n", content)
|
|
break
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// executeWithEBPFPrograms executes regular commands alongside eBPF programs
|
|
func (a *LinuxDiagnosticAgent) executeWithEBPFPrograms(resp EBPFEnhancedDiagnosticResponse) (map[string]interface{}, error) {
|
|
result := map[string]interface{}{
|
|
"command_results": make([]CommandResult, 0),
|
|
"ebpf_results": make(map[string]*EBPFTrace),
|
|
}
|
|
|
|
var ebpfProgramIDs []string
|
|
|
|
// Debug: Check if eBPF programs were requested
|
|
fmt.Printf("DEBUG: AI requested %d eBPF programs\n", len(resp.EBPFPrograms))
|
|
if a.ebpfManager == nil {
|
|
fmt.Printf("DEBUG: eBPF manager is nil\n")
|
|
} else {
|
|
fmt.Printf("DEBUG: eBPF manager available, capabilities: %v\n", a.ebpfManager.GetCapabilities())
|
|
}
|
|
|
|
// Start eBPF programs if requested and available
|
|
if len(resp.EBPFPrograms) > 0 && a.ebpfManager != nil {
|
|
fmt.Printf("Starting %d eBPF monitoring programs...\n", len(resp.EBPFPrograms))
|
|
|
|
for _, program := range resp.EBPFPrograms {
|
|
programID, err := a.ebpfManager.StartEBPFProgram(program)
|
|
if err != nil {
|
|
log.Printf("Failed to start eBPF program %s: %v", program.Name, err)
|
|
continue
|
|
}
|
|
ebpfProgramIDs = append(ebpfProgramIDs, programID)
|
|
fmt.Printf("Started eBPF program: %s (%s on %s)\n", programID, program.Type, program.Target)
|
|
}
|
|
|
|
// Give eBPF programs time to start
|
|
time.Sleep(200 * time.Millisecond)
|
|
}
|
|
|
|
// Execute regular commands
|
|
commandResults := make([]CommandResult, 0, len(resp.Commands))
|
|
for _, cmd := range resp.Commands {
|
|
fmt.Printf("\nExecuting command '%s': %s\n", cmd.ID, cmd.Command)
|
|
cmdResult := a.executor.Execute(cmd)
|
|
commandResults = append(commandResults, cmdResult)
|
|
|
|
fmt.Printf("Output:\n%s\n", cmdResult.Output)
|
|
if cmdResult.Error != "" {
|
|
fmt.Printf("Error: %s\n", cmdResult.Error)
|
|
}
|
|
}
|
|
|
|
result["command_results"] = commandResults
|
|
|
|
// If no eBPF programs were requested but we have eBPF capability and this seems network-related,
|
|
// automatically start UDP monitoring
|
|
if len(ebpfProgramIDs) == 0 && a.ebpfManager != nil && len(resp.EBPFPrograms) == 0 {
|
|
fmt.Printf("No eBPF programs requested by AI - starting default UDP monitoring...\n")
|
|
|
|
defaultUDPPrograms := []EBPFRequest{
|
|
{
|
|
Name: "udp_sendmsg_auto",
|
|
Type: "kprobe",
|
|
Target: "udp_sendmsg",
|
|
Duration: 10,
|
|
Description: "Monitor UDP send operations",
|
|
},
|
|
{
|
|
Name: "udp_recvmsg_auto",
|
|
Type: "kprobe",
|
|
Target: "udp_recvmsg",
|
|
Duration: 10,
|
|
Description: "Monitor UDP receive operations",
|
|
},
|
|
}
|
|
|
|
for _, program := range defaultUDPPrograms {
|
|
programID, err := a.ebpfManager.StartEBPFProgram(program)
|
|
if err != nil {
|
|
log.Printf("Failed to start default eBPF program %s: %v", program.Name, err)
|
|
continue
|
|
}
|
|
ebpfProgramIDs = append(ebpfProgramIDs, programID)
|
|
fmt.Printf("Started default eBPF program: %s (%s on %s)\n", programID, program.Type, program.Target)
|
|
}
|
|
}
|
|
|
|
// Wait for eBPF programs to complete and collect results
|
|
if len(ebpfProgramIDs) > 0 {
|
|
fmt.Printf("Waiting for %d eBPF programs to complete...\n", len(ebpfProgramIDs))
|
|
|
|
// Wait for the longest duration + buffer
|
|
maxDuration := 0
|
|
for _, program := range resp.EBPFPrograms {
|
|
if program.Duration > maxDuration {
|
|
maxDuration = program.Duration
|
|
}
|
|
}
|
|
|
|
waitTime := time.Duration(maxDuration+2) * time.Second
|
|
if waitTime < 5*time.Second {
|
|
waitTime = 5 * time.Second
|
|
}
|
|
|
|
time.Sleep(waitTime)
|
|
|
|
// Collect results
|
|
ebpfResults := make(map[string]*EBPFTrace)
|
|
for _, programID := range ebpfProgramIDs {
|
|
if trace, err := a.ebpfManager.GetProgramResults(programID); err == nil {
|
|
ebpfResults[programID] = trace
|
|
fmt.Printf("Collected eBPF results from %s: %d events\n", programID, trace.EventCount)
|
|
} else {
|
|
log.Printf("Failed to get results from eBPF program %s: %v", programID, err)
|
|
}
|
|
}
|
|
|
|
result["ebpf_results"] = ebpfResults
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// GetEBPFCapabilitiesPrompt returns eBPF capabilities formatted for AI prompts
|
|
func (a *LinuxDiagnosticAgent) GetEBPFCapabilitiesPrompt() string {
|
|
if a.ebpfManager == nil {
|
|
return "eBPF monitoring not available"
|
|
}
|
|
|
|
capabilities := a.ebpfManager.GetCapabilities()
|
|
summary := a.ebpfManager.GetSummary()
|
|
|
|
return fmt.Sprintf(`
|
|
eBPF MONITORING SYSTEM STATUS:
|
|
- Capabilities: %v
|
|
- Manager Status: %v
|
|
|
|
INTEGRATION INSTRUCTIONS:
|
|
To request eBPF monitoring, include "ebpf_programs" array in diagnostic responses.
|
|
Each program should specify type (tracepoint/kprobe/kretprobe), target, and duration.
|
|
eBPF programs will run in parallel with regular diagnostic commands.
|
|
`, capabilities, summary)
|
|
}
|