Somewhat okay refactoring

2025-11-08 21:48:59 +01:00
parent 794111cb44
commit c268a3a42e
12 changed files with 280 additions and 479 deletions
--- a/agent.go
+++ b/agent.go
@@ -8,42 +8,18 @@ import (
 	"net/http"
 	"os"
 	"strings"
-	"sync"
 	"time"

+	"nannyagentv2/internal/ebpf"
+	"nannyagentv2/internal/executor"
 	"nannyagentv2/internal/logging"
+	"nannyagentv2/internal/system"
 	"nannyagentv2/internal/types"

 	"github.com/sashabaranov/go-openai"
 )

-// DiagnosticResponse represents the diagnostic phase response from AI
-type DiagnosticResponse struct {
-	ResponseType    string   `json:"response_type"`
-	Phase           string   `json:"phase"`
-	Analysis        string   `json:"analysis"`
-	Commands        []string `json:"commands"`
-	NextSteps       []string `json:"next_steps"`
-	Reasoning       string   `json:"reasoning"`
-	ConfidenceLevel float64  `json:"confidence_level"`
-}
-
-// ResolutionResponse represents the resolution phase response from AI
-type ResolutionResponse struct {
-	ResponseType   string `json:"response_type"`
-	RootCause      string `json:"root_cause"`
-	ResolutionPlan string `json:"resolution_plan"`
-	Confidence     string `json:"confidence"`
-}
-
-// Command represents a command to be executed
-type Command struct {
-	ID          string `json:"id"`
-	Command     string `json:"command"`
-	Description string `json:"description"`
-}
-
-// AgentConfig holds configuration for concurrent execution
+// AgentConfig holds configuration for concurrent execution (local to agent)
 type AgentConfig struct {
 	MaxConcurrentTasks int  `json:"max_concurrent_tasks"`
 	CollectiveResults  bool `json:"collective_results"`
@@ -57,24 +33,19 @@ func DefaultAgentConfig() *AgentConfig {
 	}
 }

-// CommandResult represents the result of executing a command
-type CommandResult struct {
-	ID       string `json:"id"`
-	Command  string `json:"command"`
-	Output   string `json:"output"`
-	ExitCode int    `json:"exit_code"`
-	Error    string `json:"error,omitempty"`
-}
+//
+// LinuxDiagnosticAgent represents the main diagnostic agent

-// LinuxDiagnosticAgent represents the main agent
+// LinuxDiagnosticAgent represents the main diagnostic agent
 type LinuxDiagnosticAgent struct {
 	client      *openai.Client
 	model       string
-	executor    *CommandExecutor
-	episodeID   string           // TensorZero episode ID for conversation continuity
-	ebpfManager *BCCTraceManager // BCC-style eBPF tracing capabilities
-	config      *AgentConfig     // Configuration for concurrent execution
-	authManager interface{}      // Authentication manager for TensorZero requests
+	executor    *executor.CommandExecutor
+	episodeID   string                // TensorZero episode ID for conversation continuity
+	ebpfManager *ebpf.BCCTraceManager // eBPF tracing manager
+	config      *AgentConfig          // Configuration for concurrent execution
+	authManager interface{}           // Authentication manager for TensorZero requests
+	logger      *logging.Logger
 }

 // NewLinuxDiagnosticAgent creates a new diagnostic agent
@@ -96,12 +67,13 @@ func NewLinuxDiagnosticAgent() *LinuxDiagnosticAgent {
 	agent := &LinuxDiagnosticAgent{
 		client:   nil, // Not used anymore
 		model:    model,
-		executor: NewCommandExecutor(10 * time.Second), // 10 second timeout for commands
-		config:   DefaultAgentConfig(),                 // Default concurrent execution config
+		executor: executor.NewCommandExecutor(10 * time.Second), // 10 second timeout for commands
+		config:   DefaultAgentConfig(),                          // Default concurrent execution config
 	}

-	// Initialize BCC-style eBPF capabilities
-	agent.ebpfManager = NewBCCTraceManager()
+	// Initialize eBPF manager
+	agent.ebpfManager = ebpf.NewBCCTraceManager()
+	agent.logger = logging.NewLogger()

 	return agent
 }
@@ -125,13 +97,14 @@ func NewLinuxDiagnosticAgentWithAuth(authManager interface{}) *LinuxDiagnosticAg
 	agent := &LinuxDiagnosticAgent{
 		client:      nil, // Not used anymore
 		model:       model,
-		executor:    NewCommandExecutor(10 * time.Second), // 10 second timeout for commands
-		config:      DefaultAgentConfig(),                 // Default concurrent execution config
-		authManager: authManager,                          // Store auth manager for TensorZero requests
+		executor:    executor.NewCommandExecutor(10 * time.Second), // 10 second timeout for commands
+		config:      DefaultAgentConfig(),                          // Default concurrent execution config
+		authManager: authManager,                                   // Store auth manager for TensorZero requests
 	}

-	// Initialize BCC-style eBPF capabilities
-	agent.ebpfManager = NewBCCTraceManager()
+	// Initialize eBPF manager
+	agent.ebpfManager = ebpf.NewBCCTraceManager()
+	agent.logger = logging.NewLogger()

 	return agent
 }
@@ -142,10 +115,10 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
 	logging.Info("Gathering system information...")

 	// Gather system information
-	systemInfo := GatherSystemInfo()
+	systemInfo := system.GatherSystemInfo()

 	// Format the initial prompt with system information
-	initialPrompt := FormatSystemInfoForPrompt(systemInfo) + "\n" + issue
+	initialPrompt := system.FormatSystemInfoForPrompt(systemInfo) + "\n" + issue

 	// Start conversation with initial issue including system info
 	messages := []openai.ChatCompletionMessage{
@@ -157,7 +130,7 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {

 	for {
 		// Send request to TensorZero API via OpenAI SDK
-		response, err := a.sendRequestWithEpisode(messages, a.episodeID)
+		response, err := a.SendRequestWithEpisode(messages, a.episodeID)
 		if err != nil {
 			return fmt.Errorf("failed to send request: %w", err)
 		}
@@ -171,7 +144,7 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {

 		// Parse the response to determine next action
 		var diagnosticResp types.EBPFEnhancedDiagnosticResponse
-		var resolutionResp ResolutionResponse
+		var resolutionResp types.ResolutionResponse

 		// Try to parse as diagnostic response first (with eBPF support)
 		logging.Debug("Attempting to parse response as diagnostic...")
@@ -181,12 +154,12 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
 			logging.Debug("Reasoning: %s", diagnosticResp.Reasoning)

 			// Execute commands and collect results
-			commandResults := make([]CommandResult, 0, len(diagnosticResp.Commands))
+			commandResults := make([]types.CommandResult, 0, len(diagnosticResp.Commands))
 			if len(diagnosticResp.Commands) > 0 {
 				logging.Info("Executing %d diagnostic commands", len(diagnosticResp.Commands))
 				for i, cmdStr := range diagnosticResp.Commands {
 					// Convert string command to Command struct (auto-generate ID and description)
-					cmd := Command{
+					cmd := types.Command{
 						ID:          fmt.Sprintf("cmd_%d", i+1),
 						Command:     cmdStr,
 						Description: fmt.Sprintf("Diagnostic command: %s", cmdStr),
@@ -205,9 +178,9 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
 			if len(diagnosticResp.EBPFPrograms) > 0 {
 				logging.Info("AI requested %d eBPF traces for enhanced diagnostics", len(diagnosticResp.EBPFPrograms))

-				// Convert EBPFPrograms to TraceSpecs and execute concurrently
-				traceSpecs := a.convertEBPFProgramsToTraceSpecs(diagnosticResp.EBPFPrograms)
-				ebpfResults = a.executeBCCTracesConcurrently(traceSpecs)
+				// Convert EBPFPrograms to TraceSpecs and execute concurrently using the eBPF service
+				traceSpecs := a.ConvertEBPFProgramsToTraceSpecs(diagnosticResp.EBPFPrograms)
+				ebpfResults = a.ExecuteEBPFTraces(traceSpecs)
 			}

 			// Prepare combined results as user message
@@ -279,12 +252,17 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
 }

 // sendRequest sends a request to TensorZero via Supabase proxy (without episode ID)
-func (a *LinuxDiagnosticAgent) sendRequest(messages []openai.ChatCompletionMessage) (*openai.ChatCompletionResponse, error) {
-	return a.sendRequestWithEpisode(messages, "")
+func (a *LinuxDiagnosticAgent) SendRequest(messages []openai.ChatCompletionMessage) (*openai.ChatCompletionResponse, error) {
+	return a.SendRequestWithEpisode(messages, "")
+}
+
+// ExecuteCommand executes a command using the agent's executor
+func (a *LinuxDiagnosticAgent) ExecuteCommand(cmd types.Command) types.CommandResult {
+	return a.executor.Execute(cmd)
 }

 // sendRequestWithEpisode sends a request to TensorZero via Supabase proxy with episode ID for conversation continuity
-func (a *LinuxDiagnosticAgent) sendRequestWithEpisode(messages []openai.ChatCompletionMessage, episodeID string) (*openai.ChatCompletionResponse, error) {
+func (a *LinuxDiagnosticAgent) SendRequestWithEpisode(messages []openai.ChatCompletionMessage, episodeID string) (*openai.ChatCompletionResponse, error) {
 	// Convert messages to the expected format
 	messageMaps := make([]map[string]interface{}, len(messages))
 	for i, msg := range messages {
@@ -403,9 +381,9 @@ func (a *LinuxDiagnosticAgent) sendRequestWithEpisode(messages []openai.ChatComp
 	return response, nil
 }

-// convertEBPFProgramsToTraceSpecs converts old EBPFProgram format to new TraceSpec format
-func (a *LinuxDiagnosticAgent) convertEBPFProgramsToTraceSpecs(ebpfPrograms []types.EBPFRequest) []TraceSpec {
-	var traceSpecs []TraceSpec
+// ConvertEBPFProgramsToTraceSpecs converts old EBPFProgram format to new TraceSpec format
+func (a *LinuxDiagnosticAgent) ConvertEBPFProgramsToTraceSpecs(ebpfPrograms []types.EBPFRequest) []ebpf.TraceSpec {
+	var traceSpecs []ebpf.TraceSpec

 	for _, prog := range ebpfPrograms {
 		spec := a.convertToTraceSpec(prog)
@@ -416,7 +394,7 @@ func (a *LinuxDiagnosticAgent) convertEBPFProgramsToTraceSpecs(ebpfPrograms []ty
 }

 // convertToTraceSpec converts an EBPFRequest to a TraceSpec for BCC-style tracing
-func (a *LinuxDiagnosticAgent) convertToTraceSpec(prog types.EBPFRequest) TraceSpec {
+func (a *LinuxDiagnosticAgent) convertToTraceSpec(prog types.EBPFRequest) ebpf.TraceSpec {
 	// Determine probe type based on target and type
 	probeType := "p" // default to kprobe
 	target := prog.Target
@@ -447,7 +425,7 @@ func (a *LinuxDiagnosticAgent) convertToTraceSpec(prog types.EBPFRequest) TraceS
 		duration = 5 // default 5 seconds
 	}

-	return TraceSpec{
+	return ebpf.TraceSpec{
 		ProbeType: probeType,
 		Target:    target,
 		Format:    prog.Description, // Use description as format
@@ -457,135 +435,66 @@ func (a *LinuxDiagnosticAgent) convertToTraceSpec(prog types.EBPFRequest) TraceS
 	}
 }

-// executeBCCTracesConcurrently executes multiple BCC traces concurrently with configurable parallelism
-func (a *LinuxDiagnosticAgent) executeBCCTracesConcurrently(traceSpecs []TraceSpec) []map[string]interface{} {
+// executeEBPFTraces executes multiple eBPF traces using the eBPF service
+func (a *LinuxDiagnosticAgent) ExecuteEBPFTraces(traceSpecs []ebpf.TraceSpec) []map[string]interface{} {
 	if len(traceSpecs) == 0 {
 		return []map[string]interface{}{}
 	}

-	logging.Info("Executing %d BCC traces with max %d concurrent tasks", len(traceSpecs), a.config.MaxConcurrentTasks)
+	a.logger.Info("Executing %d eBPF traces", len(traceSpecs))

-	// Channel to limit concurrent goroutines
-	semaphore := make(chan struct{}, a.config.MaxConcurrentTasks)
-	resultsChan := make(chan map[string]interface{}, len(traceSpecs))
-	var wg sync.WaitGroup
+	results := make([]map[string]interface{}, 0, len(traceSpecs))

-	// Start all traces concurrently
+	// Execute each trace using the eBPF manager
 	for i, spec := range traceSpecs {
-		wg.Add(1)
-		go func(index int, traceSpec TraceSpec) {
-			defer wg.Done()
+		a.logger.Debug("Starting trace %d: %s", i, spec.Target)

-			// Acquire semaphore
-			semaphore <- struct{}{}
-			defer func() { <-semaphore }()
-
-			result := a.executeSingleBCCTrace(index, traceSpec)
-			resultsChan <- result
-		}(i, spec)
-	}
-
-	// Wait for all traces to complete
-	go func() {
-		wg.Wait()
-		close(resultsChan)
-	}()
-
-	// Collect all results
-	var allResults []map[string]interface{}
-	for result := range resultsChan {
-		allResults = append(allResults, result)
-	}
-
-	if a.config.CollectiveResults {
-		logging.Debug("All %d BCC traces completed. Sending collective results to API layer", len(allResults))
-	}
-
-	return allResults
-}
-
-// executeSingleBCCTrace executes a single BCC trace and returns the result
-func (a *LinuxDiagnosticAgent) executeSingleBCCTrace(index int, spec TraceSpec) map[string]interface{} {
-	result := map[string]interface{}{
-		"index":      index,
-		"target":     spec.Target,
-		"probe_type": spec.ProbeType,
-		"success":    false,
-		"error":      "",
-		"start_time": time.Now().Format(time.RFC3339),
-	}
-
-	logging.Debug("[Task %d] Starting BCC trace: %s (type: %s)", index, spec.Target, spec.ProbeType)
-
-	// Start the trace
-	traceID, err := a.ebpfManager.StartTrace(spec)
-	if err != nil {
-		result["error"] = fmt.Sprintf("Failed to start trace: %v", err)
-		logging.Error("[Task %d] Failed to start trace %s: %v", index, spec.Target, err)
-		return result
-	}
-
-	result["trace_id"] = traceID
-	logging.Debug("[Task %d] Trace %s started with ID: %s", index, spec.Target, traceID)
-
-	// Wait for the trace duration
-	time.Sleep(time.Duration(spec.Duration) * time.Second)
-
-	// Get the trace result
-	traceResult, err := a.ebpfManager.GetTraceResult(traceID)
-	if err != nil {
-		// Try to stop the trace if it's still running
-		a.ebpfManager.StopTrace(traceID)
-		result["error"] = fmt.Sprintf("Failed to get trace results: %v", err)
-		logging.Error("[Task %d] Failed to get results for trace %s: %v", index, spec.Target, err)
-		return result
-	}
-
-	// Populate result with trace data
-	result["success"] = true
-	result["end_time"] = time.Now().Format(time.RFC3339)
-	result["event_count"] = traceResult.EventCount
-	result["events_per_second"] = traceResult.Statistics.EventsPerSecond
-	result["duration"] = traceResult.EndTime.Sub(traceResult.StartTime).Seconds()
-	result["summary"] = traceResult.Summary
-
-	// Include sample events (limit to avoid large payloads)
-	maxSampleEvents := 10
-	if len(traceResult.Events) > 0 {
-		sampleCount := len(traceResult.Events)
-		if sampleCount > maxSampleEvents {
-			sampleCount = maxSampleEvents
-		}
-
-		sampleEvents := make([]map[string]interface{}, sampleCount)
-		for i := 0; i < sampleCount; i++ {
-			event := traceResult.Events[i]
-			sampleEvents[i] = map[string]interface{}{
-				"pid":          event.PID,
-				"tid":          event.TID,
-				"process_name": event.ProcessName,
-				"message":      event.Message,
-				"timestamp":    event.Timestamp,
+		// Start the trace
+		traceID, err := a.ebpfManager.StartTrace(spec)
+		if err != nil {
+			a.logger.Error("Failed to start trace %d: %v", i, err)
+			result := map[string]interface{}{
+				"index":   i,
+				"target":  spec.Target,
+				"success": false,
+				"error":   err.Error(),
 			}
+			results = append(results, result)
+			continue
 		}
-		result["sample_events"] = sampleEvents
-	}

-	// Include top processes
-	if len(traceResult.Statistics.TopProcesses) > 0 {
-		topProcesses := make([]map[string]interface{}, len(traceResult.Statistics.TopProcesses))
-		for i, proc := range traceResult.Statistics.TopProcesses {
-			topProcesses[i] = map[string]interface{}{
-				"process_name": proc.ProcessName,
-				"event_count":  proc.EventCount,
-				"percentage":   proc.Percentage,
+		// Wait for the trace duration
+		time.Sleep(time.Duration(spec.Duration) * time.Second)
+
+		// Get the trace result
+		traceResult, err := a.ebpfManager.GetTraceResult(traceID)
+		if err != nil {
+			a.logger.Error("Failed to get results for trace %d: %v", i, err)
+			result := map[string]interface{}{
+				"index":   i,
+				"target":  spec.Target,
+				"success": false,
+				"error":   err.Error(),
 			}
+			results = append(results, result)
+			continue
 		}
-		result["top_processes"] = topProcesses
+
+		// Build successful result
+		result := map[string]interface{}{
+			"index":             i,
+			"target":            spec.Target,
+			"success":           true,
+			"event_count":       traceResult.EventCount,
+			"events_per_second": traceResult.Statistics.EventsPerSecond,
+			"duration":          traceResult.EndTime.Sub(traceResult.StartTime).Seconds(),
+			"summary":           traceResult.Summary,
+		}
+		results = append(results, result)
+
+		a.logger.Debug("Completed trace %d: %d events", i, traceResult.EventCount)
 	}

-	logging.Debug("[Task %d] Trace %s completed: %d events (%.2f events/sec)",
-		index, spec.Target, traceResult.EventCount, traceResult.Statistics.EventsPerSecond)
-
-	return result
+	a.logger.Info("Completed %d eBPF traces", len(results))
+	return results
 }