Somewhat okay refactoring
This commit is contained in:
277
agent.go
277
agent.go
@@ -8,42 +8,18 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"nannyagentv2/internal/ebpf"
|
||||
"nannyagentv2/internal/executor"
|
||||
"nannyagentv2/internal/logging"
|
||||
"nannyagentv2/internal/system"
|
||||
"nannyagentv2/internal/types"
|
||||
|
||||
"github.com/sashabaranov/go-openai"
|
||||
)
|
||||
|
||||
// DiagnosticResponse represents the diagnostic phase response from AI
|
||||
type DiagnosticResponse struct {
|
||||
ResponseType string `json:"response_type"`
|
||||
Phase string `json:"phase"`
|
||||
Analysis string `json:"analysis"`
|
||||
Commands []string `json:"commands"`
|
||||
NextSteps []string `json:"next_steps"`
|
||||
Reasoning string `json:"reasoning"`
|
||||
ConfidenceLevel float64 `json:"confidence_level"`
|
||||
}
|
||||
|
||||
// ResolutionResponse represents the resolution phase response from AI
|
||||
type ResolutionResponse struct {
|
||||
ResponseType string `json:"response_type"`
|
||||
RootCause string `json:"root_cause"`
|
||||
ResolutionPlan string `json:"resolution_plan"`
|
||||
Confidence string `json:"confidence"`
|
||||
}
|
||||
|
||||
// Command represents a command to be executed
|
||||
type Command struct {
|
||||
ID string `json:"id"`
|
||||
Command string `json:"command"`
|
||||
Description string `json:"description"`
|
||||
}
|
||||
|
||||
// AgentConfig holds configuration for concurrent execution
|
||||
// AgentConfig holds configuration for concurrent execution (local to agent)
|
||||
type AgentConfig struct {
|
||||
MaxConcurrentTasks int `json:"max_concurrent_tasks"`
|
||||
CollectiveResults bool `json:"collective_results"`
|
||||
@@ -57,24 +33,19 @@ func DefaultAgentConfig() *AgentConfig {
|
||||
}
|
||||
}
|
||||
|
||||
// CommandResult represents the result of executing a command
|
||||
type CommandResult struct {
|
||||
ID string `json:"id"`
|
||||
Command string `json:"command"`
|
||||
Output string `json:"output"`
|
||||
ExitCode int `json:"exit_code"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
//
|
||||
// LinuxDiagnosticAgent represents the main diagnostic agent
|
||||
|
||||
// LinuxDiagnosticAgent represents the main agent
|
||||
// LinuxDiagnosticAgent represents the main diagnostic agent
|
||||
type LinuxDiagnosticAgent struct {
|
||||
client *openai.Client
|
||||
model string
|
||||
executor *CommandExecutor
|
||||
episodeID string // TensorZero episode ID for conversation continuity
|
||||
ebpfManager *BCCTraceManager // BCC-style eBPF tracing capabilities
|
||||
config *AgentConfig // Configuration for concurrent execution
|
||||
authManager interface{} // Authentication manager for TensorZero requests
|
||||
executor *executor.CommandExecutor
|
||||
episodeID string // TensorZero episode ID for conversation continuity
|
||||
ebpfManager *ebpf.BCCTraceManager // eBPF tracing manager
|
||||
config *AgentConfig // Configuration for concurrent execution
|
||||
authManager interface{} // Authentication manager for TensorZero requests
|
||||
logger *logging.Logger
|
||||
}
|
||||
|
||||
// NewLinuxDiagnosticAgent creates a new diagnostic agent
|
||||
@@ -96,12 +67,13 @@ func NewLinuxDiagnosticAgent() *LinuxDiagnosticAgent {
|
||||
agent := &LinuxDiagnosticAgent{
|
||||
client: nil, // Not used anymore
|
||||
model: model,
|
||||
executor: NewCommandExecutor(10 * time.Second), // 10 second timeout for commands
|
||||
config: DefaultAgentConfig(), // Default concurrent execution config
|
||||
executor: executor.NewCommandExecutor(10 * time.Second), // 10 second timeout for commands
|
||||
config: DefaultAgentConfig(), // Default concurrent execution config
|
||||
}
|
||||
|
||||
// Initialize BCC-style eBPF capabilities
|
||||
agent.ebpfManager = NewBCCTraceManager()
|
||||
// Initialize eBPF manager
|
||||
agent.ebpfManager = ebpf.NewBCCTraceManager()
|
||||
agent.logger = logging.NewLogger()
|
||||
|
||||
return agent
|
||||
}
|
||||
@@ -125,13 +97,14 @@ func NewLinuxDiagnosticAgentWithAuth(authManager interface{}) *LinuxDiagnosticAg
|
||||
agent := &LinuxDiagnosticAgent{
|
||||
client: nil, // Not used anymore
|
||||
model: model,
|
||||
executor: NewCommandExecutor(10 * time.Second), // 10 second timeout for commands
|
||||
config: DefaultAgentConfig(), // Default concurrent execution config
|
||||
authManager: authManager, // Store auth manager for TensorZero requests
|
||||
executor: executor.NewCommandExecutor(10 * time.Second), // 10 second timeout for commands
|
||||
config: DefaultAgentConfig(), // Default concurrent execution config
|
||||
authManager: authManager, // Store auth manager for TensorZero requests
|
||||
}
|
||||
|
||||
// Initialize BCC-style eBPF capabilities
|
||||
agent.ebpfManager = NewBCCTraceManager()
|
||||
// Initialize eBPF manager
|
||||
agent.ebpfManager = ebpf.NewBCCTraceManager()
|
||||
agent.logger = logging.NewLogger()
|
||||
|
||||
return agent
|
||||
}
|
||||
@@ -142,10 +115,10 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
|
||||
logging.Info("Gathering system information...")
|
||||
|
||||
// Gather system information
|
||||
systemInfo := GatherSystemInfo()
|
||||
systemInfo := system.GatherSystemInfo()
|
||||
|
||||
// Format the initial prompt with system information
|
||||
initialPrompt := FormatSystemInfoForPrompt(systemInfo) + "\n" + issue
|
||||
initialPrompt := system.FormatSystemInfoForPrompt(systemInfo) + "\n" + issue
|
||||
|
||||
// Start conversation with initial issue including system info
|
||||
messages := []openai.ChatCompletionMessage{
|
||||
@@ -157,7 +130,7 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
|
||||
|
||||
for {
|
||||
// Send request to TensorZero API via OpenAI SDK
|
||||
response, err := a.sendRequestWithEpisode(messages, a.episodeID)
|
||||
response, err := a.SendRequestWithEpisode(messages, a.episodeID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
@@ -171,7 +144,7 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
|
||||
|
||||
// Parse the response to determine next action
|
||||
var diagnosticResp types.EBPFEnhancedDiagnosticResponse
|
||||
var resolutionResp ResolutionResponse
|
||||
var resolutionResp types.ResolutionResponse
|
||||
|
||||
// Try to parse as diagnostic response first (with eBPF support)
|
||||
logging.Debug("Attempting to parse response as diagnostic...")
|
||||
@@ -181,12 +154,12 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
|
||||
logging.Debug("Reasoning: %s", diagnosticResp.Reasoning)
|
||||
|
||||
// Execute commands and collect results
|
||||
commandResults := make([]CommandResult, 0, len(diagnosticResp.Commands))
|
||||
commandResults := make([]types.CommandResult, 0, len(diagnosticResp.Commands))
|
||||
if len(diagnosticResp.Commands) > 0 {
|
||||
logging.Info("Executing %d diagnostic commands", len(diagnosticResp.Commands))
|
||||
for i, cmdStr := range diagnosticResp.Commands {
|
||||
// Convert string command to Command struct (auto-generate ID and description)
|
||||
cmd := Command{
|
||||
cmd := types.Command{
|
||||
ID: fmt.Sprintf("cmd_%d", i+1),
|
||||
Command: cmdStr,
|
||||
Description: fmt.Sprintf("Diagnostic command: %s", cmdStr),
|
||||
@@ -205,9 +178,9 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
|
||||
if len(diagnosticResp.EBPFPrograms) > 0 {
|
||||
logging.Info("AI requested %d eBPF traces for enhanced diagnostics", len(diagnosticResp.EBPFPrograms))
|
||||
|
||||
// Convert EBPFPrograms to TraceSpecs and execute concurrently
|
||||
traceSpecs := a.convertEBPFProgramsToTraceSpecs(diagnosticResp.EBPFPrograms)
|
||||
ebpfResults = a.executeBCCTracesConcurrently(traceSpecs)
|
||||
// Convert EBPFPrograms to TraceSpecs and execute concurrently using the eBPF service
|
||||
traceSpecs := a.ConvertEBPFProgramsToTraceSpecs(diagnosticResp.EBPFPrograms)
|
||||
ebpfResults = a.ExecuteEBPFTraces(traceSpecs)
|
||||
}
|
||||
|
||||
// Prepare combined results as user message
|
||||
@@ -279,12 +252,17 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
|
||||
}
|
||||
|
||||
// sendRequest sends a request to TensorZero via Supabase proxy (without episode ID)
|
||||
func (a *LinuxDiagnosticAgent) sendRequest(messages []openai.ChatCompletionMessage) (*openai.ChatCompletionResponse, error) {
|
||||
return a.sendRequestWithEpisode(messages, "")
|
||||
func (a *LinuxDiagnosticAgent) SendRequest(messages []openai.ChatCompletionMessage) (*openai.ChatCompletionResponse, error) {
|
||||
return a.SendRequestWithEpisode(messages, "")
|
||||
}
|
||||
|
||||
// ExecuteCommand executes a command using the agent's executor
|
||||
func (a *LinuxDiagnosticAgent) ExecuteCommand(cmd types.Command) types.CommandResult {
|
||||
return a.executor.Execute(cmd)
|
||||
}
|
||||
|
||||
// sendRequestWithEpisode sends a request to TensorZero via Supabase proxy with episode ID for conversation continuity
|
||||
func (a *LinuxDiagnosticAgent) sendRequestWithEpisode(messages []openai.ChatCompletionMessage, episodeID string) (*openai.ChatCompletionResponse, error) {
|
||||
func (a *LinuxDiagnosticAgent) SendRequestWithEpisode(messages []openai.ChatCompletionMessage, episodeID string) (*openai.ChatCompletionResponse, error) {
|
||||
// Convert messages to the expected format
|
||||
messageMaps := make([]map[string]interface{}, len(messages))
|
||||
for i, msg := range messages {
|
||||
@@ -403,9 +381,9 @@ func (a *LinuxDiagnosticAgent) sendRequestWithEpisode(messages []openai.ChatComp
|
||||
return response, nil
|
||||
}
|
||||
|
||||
// convertEBPFProgramsToTraceSpecs converts old EBPFProgram format to new TraceSpec format
|
||||
func (a *LinuxDiagnosticAgent) convertEBPFProgramsToTraceSpecs(ebpfPrograms []types.EBPFRequest) []TraceSpec {
|
||||
var traceSpecs []TraceSpec
|
||||
// ConvertEBPFProgramsToTraceSpecs converts old EBPFProgram format to new TraceSpec format
|
||||
func (a *LinuxDiagnosticAgent) ConvertEBPFProgramsToTraceSpecs(ebpfPrograms []types.EBPFRequest) []ebpf.TraceSpec {
|
||||
var traceSpecs []ebpf.TraceSpec
|
||||
|
||||
for _, prog := range ebpfPrograms {
|
||||
spec := a.convertToTraceSpec(prog)
|
||||
@@ -416,7 +394,7 @@ func (a *LinuxDiagnosticAgent) convertEBPFProgramsToTraceSpecs(ebpfPrograms []ty
|
||||
}
|
||||
|
||||
// convertToTraceSpec converts an EBPFRequest to a TraceSpec for BCC-style tracing
|
||||
func (a *LinuxDiagnosticAgent) convertToTraceSpec(prog types.EBPFRequest) TraceSpec {
|
||||
func (a *LinuxDiagnosticAgent) convertToTraceSpec(prog types.EBPFRequest) ebpf.TraceSpec {
|
||||
// Determine probe type based on target and type
|
||||
probeType := "p" // default to kprobe
|
||||
target := prog.Target
|
||||
@@ -447,7 +425,7 @@ func (a *LinuxDiagnosticAgent) convertToTraceSpec(prog types.EBPFRequest) TraceS
|
||||
duration = 5 // default 5 seconds
|
||||
}
|
||||
|
||||
return TraceSpec{
|
||||
return ebpf.TraceSpec{
|
||||
ProbeType: probeType,
|
||||
Target: target,
|
||||
Format: prog.Description, // Use description as format
|
||||
@@ -457,135 +435,66 @@ func (a *LinuxDiagnosticAgent) convertToTraceSpec(prog types.EBPFRequest) TraceS
|
||||
}
|
||||
}
|
||||
|
||||
// executeBCCTracesConcurrently executes multiple BCC traces concurrently with configurable parallelism
|
||||
func (a *LinuxDiagnosticAgent) executeBCCTracesConcurrently(traceSpecs []TraceSpec) []map[string]interface{} {
|
||||
// executeEBPFTraces executes multiple eBPF traces using the eBPF service
|
||||
func (a *LinuxDiagnosticAgent) ExecuteEBPFTraces(traceSpecs []ebpf.TraceSpec) []map[string]interface{} {
|
||||
if len(traceSpecs) == 0 {
|
||||
return []map[string]interface{}{}
|
||||
}
|
||||
|
||||
logging.Info("Executing %d BCC traces with max %d concurrent tasks", len(traceSpecs), a.config.MaxConcurrentTasks)
|
||||
a.logger.Info("Executing %d eBPF traces", len(traceSpecs))
|
||||
|
||||
// Channel to limit concurrent goroutines
|
||||
semaphore := make(chan struct{}, a.config.MaxConcurrentTasks)
|
||||
resultsChan := make(chan map[string]interface{}, len(traceSpecs))
|
||||
var wg sync.WaitGroup
|
||||
results := make([]map[string]interface{}, 0, len(traceSpecs))
|
||||
|
||||
// Start all traces concurrently
|
||||
// Execute each trace using the eBPF manager
|
||||
for i, spec := range traceSpecs {
|
||||
wg.Add(1)
|
||||
go func(index int, traceSpec TraceSpec) {
|
||||
defer wg.Done()
|
||||
a.logger.Debug("Starting trace %d: %s", i, spec.Target)
|
||||
|
||||
// Acquire semaphore
|
||||
semaphore <- struct{}{}
|
||||
defer func() { <-semaphore }()
|
||||
|
||||
result := a.executeSingleBCCTrace(index, traceSpec)
|
||||
resultsChan <- result
|
||||
}(i, spec)
|
||||
}
|
||||
|
||||
// Wait for all traces to complete
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(resultsChan)
|
||||
}()
|
||||
|
||||
// Collect all results
|
||||
var allResults []map[string]interface{}
|
||||
for result := range resultsChan {
|
||||
allResults = append(allResults, result)
|
||||
}
|
||||
|
||||
if a.config.CollectiveResults {
|
||||
logging.Debug("All %d BCC traces completed. Sending collective results to API layer", len(allResults))
|
||||
}
|
||||
|
||||
return allResults
|
||||
}
|
||||
|
||||
// executeSingleBCCTrace executes a single BCC trace and returns the result
|
||||
func (a *LinuxDiagnosticAgent) executeSingleBCCTrace(index int, spec TraceSpec) map[string]interface{} {
|
||||
result := map[string]interface{}{
|
||||
"index": index,
|
||||
"target": spec.Target,
|
||||
"probe_type": spec.ProbeType,
|
||||
"success": false,
|
||||
"error": "",
|
||||
"start_time": time.Now().Format(time.RFC3339),
|
||||
}
|
||||
|
||||
logging.Debug("[Task %d] Starting BCC trace: %s (type: %s)", index, spec.Target, spec.ProbeType)
|
||||
|
||||
// Start the trace
|
||||
traceID, err := a.ebpfManager.StartTrace(spec)
|
||||
if err != nil {
|
||||
result["error"] = fmt.Sprintf("Failed to start trace: %v", err)
|
||||
logging.Error("[Task %d] Failed to start trace %s: %v", index, spec.Target, err)
|
||||
return result
|
||||
}
|
||||
|
||||
result["trace_id"] = traceID
|
||||
logging.Debug("[Task %d] Trace %s started with ID: %s", index, spec.Target, traceID)
|
||||
|
||||
// Wait for the trace duration
|
||||
time.Sleep(time.Duration(spec.Duration) * time.Second)
|
||||
|
||||
// Get the trace result
|
||||
traceResult, err := a.ebpfManager.GetTraceResult(traceID)
|
||||
if err != nil {
|
||||
// Try to stop the trace if it's still running
|
||||
a.ebpfManager.StopTrace(traceID)
|
||||
result["error"] = fmt.Sprintf("Failed to get trace results: %v", err)
|
||||
logging.Error("[Task %d] Failed to get results for trace %s: %v", index, spec.Target, err)
|
||||
return result
|
||||
}
|
||||
|
||||
// Populate result with trace data
|
||||
result["success"] = true
|
||||
result["end_time"] = time.Now().Format(time.RFC3339)
|
||||
result["event_count"] = traceResult.EventCount
|
||||
result["events_per_second"] = traceResult.Statistics.EventsPerSecond
|
||||
result["duration"] = traceResult.EndTime.Sub(traceResult.StartTime).Seconds()
|
||||
result["summary"] = traceResult.Summary
|
||||
|
||||
// Include sample events (limit to avoid large payloads)
|
||||
maxSampleEvents := 10
|
||||
if len(traceResult.Events) > 0 {
|
||||
sampleCount := len(traceResult.Events)
|
||||
if sampleCount > maxSampleEvents {
|
||||
sampleCount = maxSampleEvents
|
||||
}
|
||||
|
||||
sampleEvents := make([]map[string]interface{}, sampleCount)
|
||||
for i := 0; i < sampleCount; i++ {
|
||||
event := traceResult.Events[i]
|
||||
sampleEvents[i] = map[string]interface{}{
|
||||
"pid": event.PID,
|
||||
"tid": event.TID,
|
||||
"process_name": event.ProcessName,
|
||||
"message": event.Message,
|
||||
"timestamp": event.Timestamp,
|
||||
// Start the trace
|
||||
traceID, err := a.ebpfManager.StartTrace(spec)
|
||||
if err != nil {
|
||||
a.logger.Error("Failed to start trace %d: %v", i, err)
|
||||
result := map[string]interface{}{
|
||||
"index": i,
|
||||
"target": spec.Target,
|
||||
"success": false,
|
||||
"error": err.Error(),
|
||||
}
|
||||
results = append(results, result)
|
||||
continue
|
||||
}
|
||||
result["sample_events"] = sampleEvents
|
||||
}
|
||||
|
||||
// Include top processes
|
||||
if len(traceResult.Statistics.TopProcesses) > 0 {
|
||||
topProcesses := make([]map[string]interface{}, len(traceResult.Statistics.TopProcesses))
|
||||
for i, proc := range traceResult.Statistics.TopProcesses {
|
||||
topProcesses[i] = map[string]interface{}{
|
||||
"process_name": proc.ProcessName,
|
||||
"event_count": proc.EventCount,
|
||||
"percentage": proc.Percentage,
|
||||
// Wait for the trace duration
|
||||
time.Sleep(time.Duration(spec.Duration) * time.Second)
|
||||
|
||||
// Get the trace result
|
||||
traceResult, err := a.ebpfManager.GetTraceResult(traceID)
|
||||
if err != nil {
|
||||
a.logger.Error("Failed to get results for trace %d: %v", i, err)
|
||||
result := map[string]interface{}{
|
||||
"index": i,
|
||||
"target": spec.Target,
|
||||
"success": false,
|
||||
"error": err.Error(),
|
||||
}
|
||||
results = append(results, result)
|
||||
continue
|
||||
}
|
||||
result["top_processes"] = topProcesses
|
||||
|
||||
// Build successful result
|
||||
result := map[string]interface{}{
|
||||
"index": i,
|
||||
"target": spec.Target,
|
||||
"success": true,
|
||||
"event_count": traceResult.EventCount,
|
||||
"events_per_second": traceResult.Statistics.EventsPerSecond,
|
||||
"duration": traceResult.EndTime.Sub(traceResult.StartTime).Seconds(),
|
||||
"summary": traceResult.Summary,
|
||||
}
|
||||
results = append(results, result)
|
||||
|
||||
a.logger.Debug("Completed trace %d: %d events", i, traceResult.EventCount)
|
||||
}
|
||||
|
||||
logging.Debug("[Task %d] Trace %s completed: %d events (%.2f events/sec)",
|
||||
index, spec.Target, traceResult.EventCount, traceResult.Statistics.EventsPerSecond)
|
||||
|
||||
return result
|
||||
a.logger.Info("Completed %d eBPF traces", len(results))
|
||||
return results
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user