Somewhat okay refactoring

This commit is contained in:
Harshavardhan Musanalli
2025-11-08 21:48:59 +01:00
parent 794111cb44
commit c268a3a42e
12 changed files with 280 additions and 479 deletions

277
agent.go
View File

@@ -8,42 +8,18 @@ import (
"net/http"
"os"
"strings"
"sync"
"time"
"nannyagentv2/internal/ebpf"
"nannyagentv2/internal/executor"
"nannyagentv2/internal/logging"
"nannyagentv2/internal/system"
"nannyagentv2/internal/types"
"github.com/sashabaranov/go-openai"
)
// DiagnosticResponse represents the diagnostic phase response from AI
type DiagnosticResponse struct {
ResponseType string `json:"response_type"`
Phase string `json:"phase"`
Analysis string `json:"analysis"`
Commands []string `json:"commands"`
NextSteps []string `json:"next_steps"`
Reasoning string `json:"reasoning"`
ConfidenceLevel float64 `json:"confidence_level"`
}
// ResolutionResponse represents the resolution phase response from AI
type ResolutionResponse struct {
ResponseType string `json:"response_type"`
RootCause string `json:"root_cause"`
ResolutionPlan string `json:"resolution_plan"`
Confidence string `json:"confidence"`
}
// Command represents a command to be executed
type Command struct {
ID string `json:"id"`
Command string `json:"command"`
Description string `json:"description"`
}
// AgentConfig holds configuration for concurrent execution
// AgentConfig holds configuration for concurrent execution (local to agent)
type AgentConfig struct {
MaxConcurrentTasks int `json:"max_concurrent_tasks"`
CollectiveResults bool `json:"collective_results"`
@@ -57,24 +33,19 @@ func DefaultAgentConfig() *AgentConfig {
}
}
// CommandResult represents the result of executing a command
type CommandResult struct {
ID string `json:"id"`
Command string `json:"command"`
Output string `json:"output"`
ExitCode int `json:"exit_code"`
Error string `json:"error,omitempty"`
}
//
// LinuxDiagnosticAgent represents the main diagnostic agent
// LinuxDiagnosticAgent represents the main agent
// LinuxDiagnosticAgent represents the main diagnostic agent
type LinuxDiagnosticAgent struct {
client *openai.Client
model string
executor *CommandExecutor
episodeID string // TensorZero episode ID for conversation continuity
ebpfManager *BCCTraceManager // BCC-style eBPF tracing capabilities
config *AgentConfig // Configuration for concurrent execution
authManager interface{} // Authentication manager for TensorZero requests
executor *executor.CommandExecutor
episodeID string // TensorZero episode ID for conversation continuity
ebpfManager *ebpf.BCCTraceManager // eBPF tracing manager
config *AgentConfig // Configuration for concurrent execution
authManager interface{} // Authentication manager for TensorZero requests
logger *logging.Logger
}
// NewLinuxDiagnosticAgent creates a new diagnostic agent
@@ -96,12 +67,13 @@ func NewLinuxDiagnosticAgent() *LinuxDiagnosticAgent {
agent := &LinuxDiagnosticAgent{
client: nil, // Not used anymore
model: model,
executor: NewCommandExecutor(10 * time.Second), // 10 second timeout for commands
config: DefaultAgentConfig(), // Default concurrent execution config
executor: executor.NewCommandExecutor(10 * time.Second), // 10 second timeout for commands
config: DefaultAgentConfig(), // Default concurrent execution config
}
// Initialize BCC-style eBPF capabilities
agent.ebpfManager = NewBCCTraceManager()
// Initialize eBPF manager
agent.ebpfManager = ebpf.NewBCCTraceManager()
agent.logger = logging.NewLogger()
return agent
}
@@ -125,13 +97,14 @@ func NewLinuxDiagnosticAgentWithAuth(authManager interface{}) *LinuxDiagnosticAg
agent := &LinuxDiagnosticAgent{
client: nil, // Not used anymore
model: model,
executor: NewCommandExecutor(10 * time.Second), // 10 second timeout for commands
config: DefaultAgentConfig(), // Default concurrent execution config
authManager: authManager, // Store auth manager for TensorZero requests
executor: executor.NewCommandExecutor(10 * time.Second), // 10 second timeout for commands
config: DefaultAgentConfig(), // Default concurrent execution config
authManager: authManager, // Store auth manager for TensorZero requests
}
// Initialize BCC-style eBPF capabilities
agent.ebpfManager = NewBCCTraceManager()
// Initialize eBPF manager
agent.ebpfManager = ebpf.NewBCCTraceManager()
agent.logger = logging.NewLogger()
return agent
}
@@ -142,10 +115,10 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
logging.Info("Gathering system information...")
// Gather system information
systemInfo := GatherSystemInfo()
systemInfo := system.GatherSystemInfo()
// Format the initial prompt with system information
initialPrompt := FormatSystemInfoForPrompt(systemInfo) + "\n" + issue
initialPrompt := system.FormatSystemInfoForPrompt(systemInfo) + "\n" + issue
// Start conversation with initial issue including system info
messages := []openai.ChatCompletionMessage{
@@ -157,7 +130,7 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
for {
// Send request to TensorZero API via OpenAI SDK
response, err := a.sendRequestWithEpisode(messages, a.episodeID)
response, err := a.SendRequestWithEpisode(messages, a.episodeID)
if err != nil {
return fmt.Errorf("failed to send request: %w", err)
}
@@ -171,7 +144,7 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
// Parse the response to determine next action
var diagnosticResp types.EBPFEnhancedDiagnosticResponse
var resolutionResp ResolutionResponse
var resolutionResp types.ResolutionResponse
// Try to parse as diagnostic response first (with eBPF support)
logging.Debug("Attempting to parse response as diagnostic...")
@@ -181,12 +154,12 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
logging.Debug("Reasoning: %s", diagnosticResp.Reasoning)
// Execute commands and collect results
commandResults := make([]CommandResult, 0, len(diagnosticResp.Commands))
commandResults := make([]types.CommandResult, 0, len(diagnosticResp.Commands))
if len(diagnosticResp.Commands) > 0 {
logging.Info("Executing %d diagnostic commands", len(diagnosticResp.Commands))
for i, cmdStr := range diagnosticResp.Commands {
// Convert string command to Command struct (auto-generate ID and description)
cmd := Command{
cmd := types.Command{
ID: fmt.Sprintf("cmd_%d", i+1),
Command: cmdStr,
Description: fmt.Sprintf("Diagnostic command: %s", cmdStr),
@@ -205,9 +178,9 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
if len(diagnosticResp.EBPFPrograms) > 0 {
logging.Info("AI requested %d eBPF traces for enhanced diagnostics", len(diagnosticResp.EBPFPrograms))
// Convert EBPFPrograms to TraceSpecs and execute concurrently
traceSpecs := a.convertEBPFProgramsToTraceSpecs(diagnosticResp.EBPFPrograms)
ebpfResults = a.executeBCCTracesConcurrently(traceSpecs)
// Convert EBPFPrograms to TraceSpecs and execute concurrently using the eBPF service
traceSpecs := a.ConvertEBPFProgramsToTraceSpecs(diagnosticResp.EBPFPrograms)
ebpfResults = a.ExecuteEBPFTraces(traceSpecs)
}
// Prepare combined results as user message
@@ -279,12 +252,17 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
}
// sendRequest sends a request to TensorZero via Supabase proxy (without episode ID)
func (a *LinuxDiagnosticAgent) sendRequest(messages []openai.ChatCompletionMessage) (*openai.ChatCompletionResponse, error) {
return a.sendRequestWithEpisode(messages, "")
func (a *LinuxDiagnosticAgent) SendRequest(messages []openai.ChatCompletionMessage) (*openai.ChatCompletionResponse, error) {
return a.SendRequestWithEpisode(messages, "")
}
// ExecuteCommand executes a command using the agent's executor
func (a *LinuxDiagnosticAgent) ExecuteCommand(cmd types.Command) types.CommandResult {
return a.executor.Execute(cmd)
}
// sendRequestWithEpisode sends a request to TensorZero via Supabase proxy with episode ID for conversation continuity
func (a *LinuxDiagnosticAgent) sendRequestWithEpisode(messages []openai.ChatCompletionMessage, episodeID string) (*openai.ChatCompletionResponse, error) {
func (a *LinuxDiagnosticAgent) SendRequestWithEpisode(messages []openai.ChatCompletionMessage, episodeID string) (*openai.ChatCompletionResponse, error) {
// Convert messages to the expected format
messageMaps := make([]map[string]interface{}, len(messages))
for i, msg := range messages {
@@ -403,9 +381,9 @@ func (a *LinuxDiagnosticAgent) sendRequestWithEpisode(messages []openai.ChatComp
return response, nil
}
// convertEBPFProgramsToTraceSpecs converts old EBPFProgram format to new TraceSpec format
func (a *LinuxDiagnosticAgent) convertEBPFProgramsToTraceSpecs(ebpfPrograms []types.EBPFRequest) []TraceSpec {
var traceSpecs []TraceSpec
// ConvertEBPFProgramsToTraceSpecs converts old EBPFProgram format to new TraceSpec format
func (a *LinuxDiagnosticAgent) ConvertEBPFProgramsToTraceSpecs(ebpfPrograms []types.EBPFRequest) []ebpf.TraceSpec {
var traceSpecs []ebpf.TraceSpec
for _, prog := range ebpfPrograms {
spec := a.convertToTraceSpec(prog)
@@ -416,7 +394,7 @@ func (a *LinuxDiagnosticAgent) convertEBPFProgramsToTraceSpecs(ebpfPrograms []ty
}
// convertToTraceSpec converts an EBPFRequest to a TraceSpec for BCC-style tracing
func (a *LinuxDiagnosticAgent) convertToTraceSpec(prog types.EBPFRequest) TraceSpec {
func (a *LinuxDiagnosticAgent) convertToTraceSpec(prog types.EBPFRequest) ebpf.TraceSpec {
// Determine probe type based on target and type
probeType := "p" // default to kprobe
target := prog.Target
@@ -447,7 +425,7 @@ func (a *LinuxDiagnosticAgent) convertToTraceSpec(prog types.EBPFRequest) TraceS
duration = 5 // default 5 seconds
}
return TraceSpec{
return ebpf.TraceSpec{
ProbeType: probeType,
Target: target,
Format: prog.Description, // Use description as format
@@ -457,135 +435,66 @@ func (a *LinuxDiagnosticAgent) convertToTraceSpec(prog types.EBPFRequest) TraceS
}
}
// executeBCCTracesConcurrently executes multiple BCC traces concurrently with configurable parallelism
func (a *LinuxDiagnosticAgent) executeBCCTracesConcurrently(traceSpecs []TraceSpec) []map[string]interface{} {
// executeEBPFTraces executes multiple eBPF traces using the eBPF service
func (a *LinuxDiagnosticAgent) ExecuteEBPFTraces(traceSpecs []ebpf.TraceSpec) []map[string]interface{} {
if len(traceSpecs) == 0 {
return []map[string]interface{}{}
}
logging.Info("Executing %d BCC traces with max %d concurrent tasks", len(traceSpecs), a.config.MaxConcurrentTasks)
a.logger.Info("Executing %d eBPF traces", len(traceSpecs))
// Channel to limit concurrent goroutines
semaphore := make(chan struct{}, a.config.MaxConcurrentTasks)
resultsChan := make(chan map[string]interface{}, len(traceSpecs))
var wg sync.WaitGroup
results := make([]map[string]interface{}, 0, len(traceSpecs))
// Start all traces concurrently
// Execute each trace using the eBPF manager
for i, spec := range traceSpecs {
wg.Add(1)
go func(index int, traceSpec TraceSpec) {
defer wg.Done()
a.logger.Debug("Starting trace %d: %s", i, spec.Target)
// Acquire semaphore
semaphore <- struct{}{}
defer func() { <-semaphore }()
result := a.executeSingleBCCTrace(index, traceSpec)
resultsChan <- result
}(i, spec)
}
// Wait for all traces to complete
go func() {
wg.Wait()
close(resultsChan)
}()
// Collect all results
var allResults []map[string]interface{}
for result := range resultsChan {
allResults = append(allResults, result)
}
if a.config.CollectiveResults {
logging.Debug("All %d BCC traces completed. Sending collective results to API layer", len(allResults))
}
return allResults
}
// executeSingleBCCTrace executes a single BCC trace and returns the result
func (a *LinuxDiagnosticAgent) executeSingleBCCTrace(index int, spec TraceSpec) map[string]interface{} {
result := map[string]interface{}{
"index": index,
"target": spec.Target,
"probe_type": spec.ProbeType,
"success": false,
"error": "",
"start_time": time.Now().Format(time.RFC3339),
}
logging.Debug("[Task %d] Starting BCC trace: %s (type: %s)", index, spec.Target, spec.ProbeType)
// Start the trace
traceID, err := a.ebpfManager.StartTrace(spec)
if err != nil {
result["error"] = fmt.Sprintf("Failed to start trace: %v", err)
logging.Error("[Task %d] Failed to start trace %s: %v", index, spec.Target, err)
return result
}
result["trace_id"] = traceID
logging.Debug("[Task %d] Trace %s started with ID: %s", index, spec.Target, traceID)
// Wait for the trace duration
time.Sleep(time.Duration(spec.Duration) * time.Second)
// Get the trace result
traceResult, err := a.ebpfManager.GetTraceResult(traceID)
if err != nil {
// Try to stop the trace if it's still running
a.ebpfManager.StopTrace(traceID)
result["error"] = fmt.Sprintf("Failed to get trace results: %v", err)
logging.Error("[Task %d] Failed to get results for trace %s: %v", index, spec.Target, err)
return result
}
// Populate result with trace data
result["success"] = true
result["end_time"] = time.Now().Format(time.RFC3339)
result["event_count"] = traceResult.EventCount
result["events_per_second"] = traceResult.Statistics.EventsPerSecond
result["duration"] = traceResult.EndTime.Sub(traceResult.StartTime).Seconds()
result["summary"] = traceResult.Summary
// Include sample events (limit to avoid large payloads)
maxSampleEvents := 10
if len(traceResult.Events) > 0 {
sampleCount := len(traceResult.Events)
if sampleCount > maxSampleEvents {
sampleCount = maxSampleEvents
}
sampleEvents := make([]map[string]interface{}, sampleCount)
for i := 0; i < sampleCount; i++ {
event := traceResult.Events[i]
sampleEvents[i] = map[string]interface{}{
"pid": event.PID,
"tid": event.TID,
"process_name": event.ProcessName,
"message": event.Message,
"timestamp": event.Timestamp,
// Start the trace
traceID, err := a.ebpfManager.StartTrace(spec)
if err != nil {
a.logger.Error("Failed to start trace %d: %v", i, err)
result := map[string]interface{}{
"index": i,
"target": spec.Target,
"success": false,
"error": err.Error(),
}
results = append(results, result)
continue
}
result["sample_events"] = sampleEvents
}
// Include top processes
if len(traceResult.Statistics.TopProcesses) > 0 {
topProcesses := make([]map[string]interface{}, len(traceResult.Statistics.TopProcesses))
for i, proc := range traceResult.Statistics.TopProcesses {
topProcesses[i] = map[string]interface{}{
"process_name": proc.ProcessName,
"event_count": proc.EventCount,
"percentage": proc.Percentage,
// Wait for the trace duration
time.Sleep(time.Duration(spec.Duration) * time.Second)
// Get the trace result
traceResult, err := a.ebpfManager.GetTraceResult(traceID)
if err != nil {
a.logger.Error("Failed to get results for trace %d: %v", i, err)
result := map[string]interface{}{
"index": i,
"target": spec.Target,
"success": false,
"error": err.Error(),
}
results = append(results, result)
continue
}
result["top_processes"] = topProcesses
// Build successful result
result := map[string]interface{}{
"index": i,
"target": spec.Target,
"success": true,
"event_count": traceResult.EventCount,
"events_per_second": traceResult.Statistics.EventsPerSecond,
"duration": traceResult.EndTime.Sub(traceResult.StartTime).Seconds(),
"summary": traceResult.Summary,
}
results = append(results, result)
a.logger.Debug("Completed trace %d: %d events", i, traceResult.EventCount)
}
logging.Debug("[Task %d] Trace %s completed: %d events (%.2f events/sec)",
index, spec.Target, traceResult.EventCount, traceResult.Statistics.EventsPerSecond)
return result
a.logger.Info("Completed %d eBPF traces", len(results))
return results
}

View File

@@ -1,107 +0,0 @@
package main
import (
"testing"
"time"
)
func TestCommandExecutor_ValidateCommand(t *testing.T) {
executor := NewCommandExecutor(5 * time.Second)
tests := []struct {
name string
command string
wantErr bool
}{
{
name: "safe command - ls",
command: "ls -la /var",
wantErr: false,
},
{
name: "safe command - df",
command: "df -h",
wantErr: false,
},
{
name: "safe command - ps",
command: "ps aux | grep nginx",
wantErr: false,
},
{
name: "dangerous command - rm",
command: "rm -rf /tmp/*",
wantErr: true,
},
{
name: "dangerous command - dd",
command: "dd if=/dev/zero of=/dev/sda",
wantErr: true,
},
{
name: "dangerous command - sudo",
command: "sudo systemctl stop nginx",
wantErr: true,
},
{
name: "dangerous command - redirection",
command: "echo 'test' > /etc/passwd",
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := executor.validateCommand(tt.command)
if (err != nil) != tt.wantErr {
t.Errorf("validateCommand() error = %v, wantErr %v", err, tt.wantErr)
}
})
}
}
func TestCommandExecutor_Execute(t *testing.T) {
executor := NewCommandExecutor(5 * time.Second)
// Test safe command execution
cmd := Command{
ID: "test_echo",
Command: "echo 'Hello, World!'",
Description: "Test echo command",
}
result := executor.Execute(cmd)
if result.ExitCode != 0 {
t.Errorf("Expected exit code 0, got %d", result.ExitCode)
}
if result.Output != "Hello, World!\n" {
t.Errorf("Expected 'Hello, World!\\n', got '%s'", result.Output)
}
if result.Error != "" {
t.Errorf("Expected no error, got '%s'", result.Error)
}
}
func TestCommandExecutor_ExecuteUnsafeCommand(t *testing.T) {
executor := NewCommandExecutor(5 * time.Second)
// Test unsafe command rejection
cmd := Command{
ID: "test_rm",
Command: "rm -rf /tmp/test",
Description: "Dangerous rm command",
}
result := executor.Execute(cmd)
if result.ExitCode != 1 {
t.Errorf("Expected exit code 1 for unsafe command, got %d", result.ExitCode)
}
if result.Error == "" {
t.Error("Expected error for unsafe command, got none")
}
}

View File

@@ -1,4 +1,4 @@
package main
package ebpf
import (
"bufio"

View File

@@ -1,4 +1,4 @@
package main
package ebpf
import (
"context"

View File

@@ -1,4 +1,4 @@
package main
package ebpf
import (
"encoding/json"

View File

@@ -1,4 +1,4 @@
package main
package ebpf
import (
"encoding/json"
@@ -749,13 +749,18 @@ func TestAgentEBPFIntegration(t *testing.T) {
fmt.Println("\n=== Agent eBPF Integration Test ===")
fmt.Println("This test demonstrates the complete agent flow with BCC-style tracing")
// Create agent with eBPF manager
agent := &LinuxDiagnosticAgent{}
agent.ebpfManager = NewBCCTraceManager()
agent.config = DefaultAgentConfig() // Add config for concurrent execution
// Create eBPF manager directly for testing
manager := NewBCCTraceManager()
// Test multiple syscalls that would be sent by remote API
testEBPFRequests := []EBPFRequest{
testEBPFRequests := []struct {
Name string `json:"name"`
Type string `json:"type"`
Target string `json:"target"`
Duration int `json:"duration"`
Description string `json:"description"`
Filters map[string]string `json:"filters"`
}{
{
Name: "file_operations",
Type: "syscall",
@@ -782,11 +787,49 @@ func TestAgentEBPFIntegration(t *testing.T) {
},
}
fmt.Printf("🚀 Testing agent with %d eBPF programs...\n\n", len(testEBPFRequests))
fmt.Printf("🚀 Testing eBPF manager with %d eBPF programs...\n\n", len(testEBPFRequests))
// Execute eBPF programs through agent (simulating API call)
traceSpecs := agent.convertEBPFProgramsToTraceSpecs(testEBPFRequests)
results := agent.executeBCCTracesConcurrently(traceSpecs)
// Convert to trace specs and execute using manager directly
var traceSpecs []TraceSpec
for _, req := range testEBPFRequests {
spec := TraceSpec{
ProbeType: "p", // kprobe
Target: "__x64_" + req.Target,
Format: req.Description,
Duration: req.Duration,
}
traceSpecs = append(traceSpecs, spec)
}
// Execute traces sequentially for testing
var results []map[string]interface{}
for i, spec := range traceSpecs {
fmt.Printf("Starting trace %d: %s\n", i+1, spec.Target)
traceID, err := manager.StartTrace(spec)
if err != nil {
fmt.Printf("Failed to start trace: %v\n", err)
continue
}
// Wait for trace duration
time.Sleep(time.Duration(spec.Duration) * time.Second)
traceResult, err := manager.GetTraceResult(traceID)
if err != nil {
fmt.Printf("Failed to get results: %v\n", err)
continue
}
result := map[string]interface{}{
"name": testEBPFRequests[i].Name,
"target": spec.Target,
"success": true,
"event_count": traceResult.EventCount,
"summary": traceResult.Summary,
}
results = append(results, result)
}
fmt.Printf("📊 Agent eBPF Execution Results:\n")
fmt.Printf("=" + strings.Repeat("=", 50) + "\n\n")

View File

@@ -1,4 +1,4 @@
package main
package executor
import (
"context"
@@ -6,6 +6,8 @@ import (
"os/exec"
"strings"
"time"
"nannyagentv2/internal/types"
)
// CommandExecutor handles safe execution of diagnostic commands
@@ -21,8 +23,8 @@ func NewCommandExecutor(timeout time.Duration) *CommandExecutor {
}
// Execute executes a command safely with timeout and validation
func (ce *CommandExecutor) Execute(cmd Command) CommandResult {
result := CommandResult{
func (ce *CommandExecutor) Execute(cmd types.Command) types.CommandResult {
result := types.CommandResult{
ID: cmd.ID,
Command: cmd.Command,
}

View File

@@ -1,4 +1,4 @@
package main
package server
import (
"encoding/json"
@@ -11,6 +11,7 @@ import (
"nannyagentv2/internal/auth"
"nannyagentv2/internal/logging"
"nannyagentv2/internal/metrics"
"nannyagentv2/internal/types"
"github.com/sashabaranov/go-openai"
)
@@ -27,20 +28,20 @@ type InvestigationRequest struct {
// InvestigationResponse represents the agent's response to an investigation
type InvestigationResponse struct {
AgentID string `json:"agent_id"`
InvestigationID string `json:"investigation_id"`
Status string `json:"status"`
Commands []CommandResult `json:"commands,omitempty"`
AIResponse string `json:"ai_response,omitempty"`
EpisodeID string `json:"episode_id,omitempty"`
Timestamp time.Time `json:"timestamp"`
Error string `json:"error,omitempty"`
AgentID string `json:"agent_id"`
InvestigationID string `json:"investigation_id"`
Status string `json:"status"`
Commands []types.CommandResult `json:"commands,omitempty"`
AIResponse string `json:"ai_response,omitempty"`
EpisodeID string `json:"episode_id,omitempty"`
Timestamp time.Time `json:"timestamp"`
Error string `json:"error,omitempty"`
}
// InvestigationServer handles reverse investigation requests from Supabase
type InvestigationServer struct {
agent *LinuxDiagnosticAgent // Original agent for direct user interactions
applicationAgent *LinuxDiagnosticAgent // Separate agent for application-initiated investigations
agent types.DiagnosticAgent // Original agent for direct user interactions
applicationAgent types.DiagnosticAgent // Separate agent for application-initiated investigations
port string
agentID string
metricsCollector *metrics.Collector
@@ -50,7 +51,7 @@ type InvestigationServer struct {
}
// NewInvestigationServer creates a new investigation server
func NewInvestigationServer(agent *LinuxDiagnosticAgent, authManager *auth.AuthManager) *InvestigationServer {
func NewInvestigationServer(agent types.DiagnosticAgent, authManager *auth.AuthManager) *InvestigationServer {
port := os.Getenv("AGENT_PORT")
if port == "" {
port = "1234"
@@ -78,14 +79,15 @@ func NewInvestigationServer(agent *LinuxDiagnosticAgent, authManager *auth.AuthM
// Create metrics collector
metricsCollector := metrics.NewCollector("v2.0.0")
// TODO: Fix application agent creation - use main agent for now
// Create a separate agent for application-initiated investigations
applicationAgent := NewLinuxDiagnosticAgent()
// applicationAgent := NewLinuxDiagnosticAgent()
// Override the model to use the application-specific function
applicationAgent.model = "tensorzero::function_name::diagnose_and_heal_application"
// applicationAgent.model = "tensorzero::function_name::diagnose_and_heal_application"
return &InvestigationServer{
agent: agent,
applicationAgent: applicationAgent,
applicationAgent: agent, // Use same agent for now
port: port,
agentID: agentID,
metricsCollector: metricsCollector,
@@ -98,7 +100,8 @@ func NewInvestigationServer(agent *LinuxDiagnosticAgent, authManager *auth.AuthM
// DiagnoseIssueForApplication handles diagnostic requests initiated from application/portal
func (s *InvestigationServer) DiagnoseIssueForApplication(issue, episodeID string) error {
// Set the episode ID on the application agent for continuity
s.applicationAgent.episodeID = episodeID
// TODO: Fix episode ID handling with interface
// s.applicationAgent.episodeID = episodeID
return s.applicationAgent.DiagnoseIssue(issue)
}
@@ -198,7 +201,7 @@ func (s *InvestigationServer) handleStatus(w http.ResponseWriter, r *http.Reques
}
// sendCommandResultsToTensorZero sends command results back to TensorZero and continues conversation
func (s *InvestigationServer) sendCommandResultsToTensorZero(diagnosticResp DiagnosticResponse, commandResults []CommandResult) (interface{}, error) {
func (s *InvestigationServer) sendCommandResultsToTensorZero(diagnosticResp types.DiagnosticResponse, commandResults []types.CommandResult) (interface{}, error) {
// Build conversation history like in agent.go
messages := []openai.ChatCompletionMessage{
// Add the original diagnostic response as assistant message
@@ -223,7 +226,7 @@ func (s *InvestigationServer) sendCommandResultsToTensorZero(diagnosticResp Diag
// Send to TensorZero via application agent's sendRequest method
logging.Debug("Sending command results to TensorZero for analysis")
response, err := s.applicationAgent.sendRequest(messages)
response, err := s.applicationAgent.SendRequest(messages)
if err != nil {
return nil, fmt.Errorf("failed to send request to TensorZero: %w", err)
}
@@ -236,8 +239,8 @@ func (s *InvestigationServer) sendCommandResultsToTensorZero(diagnosticResp Diag
logging.Debug("TensorZero continued analysis: %s", content)
// Try to parse the response to determine if it's diagnostic or resolution
var diagnosticNextResp DiagnosticResponse
var resolutionResp ResolutionResponse
var diagnosticNextResp types.DiagnosticResponse
var resolutionResp types.ResolutionResponse
// Check if it's another diagnostic response
if err := json.Unmarshal([]byte(content), &diagnosticNextResp); err == nil && diagnosticNextResp.ResponseType == "diagnostic" {
@@ -324,7 +327,7 @@ func (s *InvestigationServer) handleInvestigation(w http.ResponseWriter, r *http
// handleDiagnosticExecution executes commands from a DiagnosticResponse
func (s *InvestigationServer) handleDiagnosticExecution(requestBody map[string]interface{}) map[string]interface{} {
// Parse as DiagnosticResponse
var diagnosticResp DiagnosticResponse
var diagnosticResp types.DiagnosticResponse
// Convert the map back to JSON and then parse it properly
jsonData, err := json.Marshal(requestBody)
@@ -347,19 +350,13 @@ func (s *InvestigationServer) handleDiagnosticExecution(requestBody map[string]i
fmt.Printf("📋 Executing %d commands from backend\n", len(diagnosticResp.Commands))
// Execute all commands
commandResults := make([]CommandResult, 0, len(diagnosticResp.Commands))
commandResults := make([]types.CommandResult, 0, len(diagnosticResp.Commands))
for i, cmdStr := range diagnosticResp.Commands {
// Convert string to Command struct
cmd := Command{
ID: fmt.Sprintf("cmd_%d", i),
Command: cmdStr,
Description: fmt.Sprintf("Investigation command: %s", cmdStr),
}
for _, cmd := range diagnosticResp.Commands {
fmt.Printf("⚙️ Executing command '%s': %s\n", cmd.ID, cmd.Command)
// Use the agent's executor to run the command
result := s.agent.executor.Execute(cmd)
result := s.agent.ExecuteCommand(cmd)
commandResults = append(commandResults, result)
if result.Error != "" {

View File

@@ -1,4 +1,4 @@
package main
package system
import (
"fmt"
@@ -6,6 +6,9 @@ import (
"runtime"
"strings"
"time"
"nannyagentv2/internal/executor"
"nannyagentv2/internal/types"
)
// SystemInfo represents basic system information
@@ -25,42 +28,42 @@ type SystemInfo struct {
// GatherSystemInfo collects basic system information
func GatherSystemInfo() *SystemInfo {
info := &SystemInfo{}
executor := NewCommandExecutor(5 * time.Second)
executor := executor.NewCommandExecutor(5 * time.Second)
// Basic system info
if result := executor.Execute(Command{ID: "hostname", Command: "hostname"}); result.ExitCode == 0 {
if result := executor.Execute(types.Command{ID: "hostname", Command: "hostname"}); result.ExitCode == 0 {
info.Hostname = strings.TrimSpace(result.Output)
}
if result := executor.Execute(Command{ID: "os", Command: "lsb_release -d 2>/dev/null | cut -f2 || cat /etc/os-release | grep PRETTY_NAME | cut -d'=' -f2 | tr -d '\"'"}); result.ExitCode == 0 {
if result := executor.Execute(types.Command{ID: "os", Command: "lsb_release -d 2>/dev/null | cut -f2 || cat /etc/os-release | grep PRETTY_NAME | cut -d'=' -f2 | tr -d '\"'"}); result.ExitCode == 0 {
info.OS = strings.TrimSpace(result.Output)
}
if result := executor.Execute(Command{ID: "kernel", Command: "uname -r"}); result.ExitCode == 0 {
if result := executor.Execute(types.Command{ID: "kernel", Command: "uname -r"}); result.ExitCode == 0 {
info.Kernel = strings.TrimSpace(result.Output)
}
if result := executor.Execute(Command{ID: "arch", Command: "uname -m"}); result.ExitCode == 0 {
if result := executor.Execute(types.Command{ID: "arch", Command: "uname -m"}); result.ExitCode == 0 {
info.Architecture = strings.TrimSpace(result.Output)
}
if result := executor.Execute(Command{ID: "cores", Command: "nproc"}); result.ExitCode == 0 {
if result := executor.Execute(types.Command{ID: "cores", Command: "nproc"}); result.ExitCode == 0 {
info.CPUCores = strings.TrimSpace(result.Output)
}
if result := executor.Execute(Command{ID: "memory", Command: "free -h | grep Mem | awk '{print $2}'"}); result.ExitCode == 0 {
if result := executor.Execute(types.Command{ID: "memory", Command: "free -h | grep Mem | awk '{print $2}'"}); result.ExitCode == 0 {
info.Memory = strings.TrimSpace(result.Output)
}
if result := executor.Execute(Command{ID: "uptime", Command: "uptime -p"}); result.ExitCode == 0 {
if result := executor.Execute(types.Command{ID: "uptime", Command: "uptime -p"}); result.ExitCode == 0 {
info.Uptime = strings.TrimSpace(result.Output)
}
if result := executor.Execute(Command{ID: "load", Command: "uptime | awk -F'load average:' '{print $2}' | xargs"}); result.ExitCode == 0 {
if result := executor.Execute(types.Command{ID: "load", Command: "uptime | awk -F'load average:' '{print $2}' | xargs"}); result.ExitCode == 0 {
info.LoadAverage = strings.TrimSpace(result.Output)
}
if result := executor.Execute(Command{ID: "disk", Command: "df -h / | tail -1 | awk '{print \"Root: \" $3 \"/\" $2 \" (\" $5 \" used)\"}'"}); result.ExitCode == 0 {
if result := executor.Execute(types.Command{ID: "disk", Command: "df -h / | tail -1 | awk '{print \"Root: \" $3 \"/\" $2 \" (\" $5 \" used)\"}'"}); result.ExitCode == 0 {
info.DiskUsage = strings.TrimSpace(result.Output)
}

View File

@@ -1,6 +1,12 @@
package types
import "time"
import (
"time"
"nannyagentv2/internal/ebpf"
"github.com/sashabaranov/go-openai"
)
// SystemMetrics represents comprehensive system performance metrics
type SystemMetrics struct {
@@ -59,43 +65,47 @@ type SystemMetrics struct {
Timestamp time.Time `json:"timestamp"`
}
// FilesystemInfo represents individual filesystem statistics
// FilesystemInfo represents filesystem information
type FilesystemInfo struct {
Device string `json:"device"`
Mountpoint string `json:"mountpoint"`
Type string `json:"type"`
Fstype string `json:"fstype"`
Total uint64 `json:"total"`
Used uint64 `json:"used"`
Free uint64 `json:"free"`
Usage float64 `json:"usage"`
UsagePercent float64 `json:"usage_percent"`
}
// BlockDevice represents block device information
// BlockDevice represents a block device
type BlockDevice struct {
Name string `json:"name"`
Size uint64 `json:"size"`
Model string `json:"model"`
Type string `json:"type"`
Model string `json:"model,omitempty"`
SerialNumber string `json:"serial_number"`
}
// NetworkStats represents detailed network interface statistics
// NetworkStats represents network interface statistics
type NetworkStats struct {
InterfaceName string `json:"interface_name"`
BytesSent uint64 `json:"bytes_sent"`
BytesRecv uint64 `json:"bytes_recv"`
PacketsSent uint64 `json:"packets_sent"`
PacketsRecv uint64 `json:"packets_recv"`
ErrorsIn uint64 `json:"errors_in"`
ErrorsOut uint64 `json:"errors_out"`
DropsIn uint64 `json:"drops_in"`
DropsOut uint64 `json:"drops_out"`
Interface string `json:"interface"`
BytesRecv uint64 `json:"bytes_recv"`
BytesSent uint64 `json:"bytes_sent"`
PacketsRecv uint64 `json:"packets_recv"`
PacketsSent uint64 `json:"packets_sent"`
ErrorsIn uint64 `json:"errors_in"`
ErrorsOut uint64 `json:"errors_out"`
DropsIn uint64 `json:"drops_in"`
DropsOut uint64 `json:"drops_out"`
}
// AuthToken represents the authentication token structure
// AuthToken represents an authentication token
type AuthToken struct {
AccessToken string `json:"access_token"`
RefreshToken string `json:"refresh_token"`
ExpiresAt time.Time `json:"expires_at"`
TokenType string `json:"token_type"`
ExpiresAt time.Time `json:"expires_at"`
AgentID string `json:"agent_id"`
}
@@ -169,53 +179,14 @@ type MetricsRequest struct {
NetworkStats map[string]uint64 `json:"network_stats"`
}
// eBPF related types
type EBPFEvent struct {
Timestamp int64 `json:"timestamp"`
EventType string `json:"event_type"`
ProcessID int `json:"process_id"`
ProcessName string `json:"process_name"`
UserID int `json:"user_id"`
Data map[string]interface{} `json:"data"`
}
type EBPFTrace struct {
TraceID string `json:"trace_id"`
StartTime time.Time `json:"start_time"`
EndTime time.Time `json:"end_time"`
Capability string `json:"capability"`
Events []EBPFEvent `json:"events"`
Summary string `json:"summary"`
EventCount int `json:"event_count"`
ProcessList []string `json:"process_list"`
}
type EBPFRequest struct {
Name string `json:"name"`
Type string `json:"type"` // "tracepoint", "kprobe", "kretprobe"
Target string `json:"target"` // tracepoint path or function name
Duration int `json:"duration"` // seconds
Filters map[string]string `json:"filters,omitempty"`
Description string `json:"description"`
}
type NetworkEvent struct {
Timestamp uint64 `json:"timestamp"`
PID uint32 `json:"pid"`
TID uint32 `json:"tid"`
UID uint32 `json:"uid"`
EventType string `json:"event_type"`
Comm [16]byte `json:"-"`
CommStr string `json:"comm"`
}
// Agent types
// Agent types for TensorZero integration
type DiagnosticResponse struct {
ResponseType string `json:"response_type"`
Reasoning string `json:"reasoning"`
Commands []Command `json:"commands"`
}
// ResolutionResponse represents a resolution response
type ResolutionResponse struct {
ResponseType string `json:"response_type"`
RootCause string `json:"root_cause"`
@@ -223,12 +194,14 @@ type ResolutionResponse struct {
Confidence string `json:"confidence"`
}
// Command represents a command to execute
type Command struct {
ID string `json:"id"`
Command string `json:"command"`
Description string `json:"description"`
}
// CommandResult represents the result of an executed command
type CommandResult struct {
ID string `json:"id"`
Command string `json:"command"`
@@ -238,6 +211,17 @@ type CommandResult struct {
Error string `json:"error,omitempty"`
}
// EBPFRequest represents an eBPF trace request from external API
type EBPFRequest struct {
Name string `json:"name"`
Type string `json:"type"` // "tracepoint", "kprobe", "kretprobe"
Target string `json:"target"` // tracepoint path or function name
Duration int `json:"duration"` // seconds
Filters map[string]string `json:"filters,omitempty"`
Description string `json:"description"`
}
// EBPFEnhancedDiagnosticResponse represents enhanced diagnostic response with eBPF
type EBPFEnhancedDiagnosticResponse struct {
ResponseType string `json:"response_type"`
Reasoning string `json:"reasoning"`
@@ -246,79 +230,20 @@ type EBPFEnhancedDiagnosticResponse struct {
NextActions []string `json:"next_actions,omitempty"`
}
// TensorZeroRequest represents a request to TensorZero
type TensorZeroRequest struct {
Model string `json:"model"`
Messages []map[string]interface{} `json:"messages"`
EpisodeID string `json:"tensorzero::episode_id,omitempty"`
}
// TensorZeroResponse represents a response from TensorZero
type TensorZeroResponse struct {
Choices []map[string]interface{} `json:"choices"`
EpisodeID string `json:"episode_id"`
}
// WebSocket types
type WebSocketMessage struct {
Type string `json:"type"`
Data interface{} `json:"data"`
}
type InvestigationTask struct {
TaskID string `json:"task_id"`
InvestigationID string `json:"investigation_id"`
AgentID string `json:"agent_id"`
DiagnosticPayload map[string]interface{} `json:"diagnostic_payload"`
EpisodeID string `json:"episode_id,omitempty"`
}
type TaskResult struct {
TaskID string `json:"task_id"`
Success bool `json:"success"`
CommandResults map[string]interface{} `json:"command_results,omitempty"`
Error string `json:"error,omitempty"`
}
type HeartbeatData struct {
AgentID string `json:"agent_id"`
Timestamp time.Time `json:"timestamp"`
Version string `json:"version"`
}
// Investigation server types
type InvestigationRequest struct {
Issue string `json:"issue"`
AgentID string `json:"agent_id"`
EpisodeID string `json:"episode_id,omitempty"`
Timestamp string `json:"timestamp,omitempty"`
Priority string `json:"priority,omitempty"`
Description string `json:"description,omitempty"`
}
type InvestigationResponse struct {
Status string `json:"status"`
Message string `json:"message"`
Results map[string]interface{} `json:"results,omitempty"`
AgentID string `json:"agent_id"`
Timestamp string `json:"timestamp"`
EpisodeID string `json:"episode_id,omitempty"`
Investigation *PendingInvestigation `json:"investigation,omitempty"`
}
type PendingInvestigation struct {
ID string `json:"id"`
Issue string `json:"issue"`
AgentID string `json:"agent_id"`
Status string `json:"status"`
DiagnosticPayload map[string]interface{} `json:"diagnostic_payload"`
CommandResults map[string]interface{} `json:"command_results,omitempty"`
EpisodeID *string `json:"episode_id,omitempty"`
CreatedAt string `json:"created_at"`
StartedAt *string `json:"started_at,omitempty"`
CompletedAt *string `json:"completed_at,omitempty"`
ErrorMessage *string `json:"error_message,omitempty"`
}
// System types
// SystemInfo represents system information (for compatibility)
type SystemInfo struct {
Hostname string `json:"hostname"`
Platform string `json:"platform"`
@@ -331,7 +256,35 @@ type SystemInfo struct {
DiskInfo []map[string]string `json:"disk_info"`
}
// Executor types
type CommandExecutor struct {
timeout time.Duration
// AgentConfig represents agent configuration
type AgentConfig struct {
TensorZeroAPIKey string `json:"tensorzero_api_key"`
APIURL string `json:"api_url"`
Timeout int `json:"timeout"`
Debug bool `json:"debug"`
MaxRetries int `json:"max_retries"`
BackoffFactor int `json:"backoff_factor"`
EpisodeID string `json:"episode_id,omitempty"`
}
// PendingInvestigation represents a pending investigation from the database
type PendingInvestigation struct {
ID string `json:"id"`
InvestigationID string `json:"investigation_id"`
AgentID string `json:"agent_id"`
DiagnosticPayload map[string]interface{} `json:"diagnostic_payload"`
EpisodeID *string `json:"episode_id"`
Status string `json:"status"`
CreatedAt time.Time `json:"created_at"`
}
// DiagnosticAgent interface for agent functionality needed by other packages
type DiagnosticAgent interface {
DiagnoseIssue(issue string) error
// Exported method names to match what websocket client calls
ConvertEBPFProgramsToTraceSpecs(ebpfRequests []EBPFRequest) []ebpf.TraceSpec
ExecuteEBPFTraces(traceSpecs []ebpf.TraceSpec) []map[string]interface{}
SendRequestWithEpisode(messages []openai.ChatCompletionMessage, episodeID string) (*openai.ChatCompletionResponse, error)
SendRequest(messages []openai.ChatCompletionMessage) (*openai.ChatCompletionResponse, error)
ExecuteCommand(cmd Command) CommandResult
}

View File

@@ -1,4 +1,4 @@
package main
package websocket
import (
"context"
@@ -55,7 +55,7 @@ type HeartbeatData struct {
// WebSocketClient handles WebSocket connection to Supabase backend
type WebSocketClient struct {
agent *LinuxDiagnosticAgent
agent types.DiagnosticAgent // DiagnosticAgent interface
conn *websocket.Conn
agentID string
authManager *auth.AuthManager
@@ -68,7 +68,7 @@ type WebSocketClient struct {
}
// NewWebSocketClient creates a new WebSocket client
func NewWebSocketClient(agent *LinuxDiagnosticAgent, authManager *auth.AuthManager) *WebSocketClient {
func NewWebSocketClient(agent types.DiagnosticAgent, authManager *auth.AuthManager) *WebSocketClient {
// Get agent ID from authentication system
var agentID string
if authManager != nil {
@@ -410,8 +410,8 @@ func (c *WebSocketClient) executeEBPFPrograms(ebpfPrograms []interface{}) []map[
}
// Execute eBPF programs using the agent's new BCC concurrent execution logic
traceSpecs := c.agent.convertEBPFProgramsToTraceSpecs(ebpfRequests)
return c.agent.executeBCCTracesConcurrently(traceSpecs)
traceSpecs := c.agent.ConvertEBPFProgramsToTraceSpecs(ebpfRequests)
return c.agent.ExecuteEBPFTraces(traceSpecs)
}
// executeCommandsFromPayload executes commands from a payload and returns results
@@ -587,7 +587,7 @@ func (c *WebSocketClient) checkForPendingInvestigations() {
return
}
var investigations []PendingInvestigation
var investigations []types.PendingInvestigation
err = json.NewDecoder(resp.Body).Decode(&investigations)
if err != nil {
// Response decode failed
@@ -600,7 +600,7 @@ func (c *WebSocketClient) checkForPendingInvestigations() {
}
// handlePendingInvestigation processes a pending investigation from database polling
func (c *WebSocketClient) handlePendingInvestigation(investigation PendingInvestigation) {
func (c *WebSocketClient) handlePendingInvestigation(investigation types.PendingInvestigation) {
// Processing pending investigation
// Mark as executing
@@ -656,7 +656,7 @@ func (c *WebSocketClient) handlePendingInvestigation(investigation PendingInvest
// Continue conversation until resolution (same as agent)
var finalAIContent string
for {
tzResp, tzErr := c.agent.sendRequestWithEpisode(messages, episodeID)
tzResp, tzErr := c.agent.SendRequestWithEpisode(messages, episodeID)
if tzErr != nil {
logging.Warning("TensorZero continuation failed: %v", tzErr)
// Fall back to marking completed with command results only

View File

@@ -16,6 +16,7 @@ import (
"nannyagentv2/internal/logging"
"nannyagentv2/internal/metrics"
"nannyagentv2/internal/types"
"nannyagentv2/internal/websocket"
)
const Version = "v2.0.0"
@@ -162,7 +163,7 @@ func main() {
applicationAgent.model = "tensorzero::function_name::diagnose_and_heal_application"
// Start WebSocket client for backend communications and investigations
wsClient := NewWebSocketClient(applicationAgent, authManager)
wsClient := websocket.NewWebSocketClient(applicationAgent, authManager)
go func() {
if err := wsClient.Start(); err != nil {
logging.Error("WebSocket client error: %v", err)