Agent and websocket investigations work fine
This commit is contained in:
204
agent.go
204
agent.go
@@ -102,7 +102,7 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
|
||||
|
||||
for {
|
||||
// Send request to TensorZero API via OpenAI SDK
|
||||
response, err := a.sendRequest(messages)
|
||||
response, err := a.sendRequestWithEpisode(messages, a.episodeID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
@@ -115,34 +115,73 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
|
||||
fmt.Printf("\nAI Response:\n%s\n", content)
|
||||
|
||||
// Parse the response to determine next action
|
||||
var diagnosticResp DiagnosticResponse
|
||||
var diagnosticResp EBPFEnhancedDiagnosticResponse
|
||||
var resolutionResp ResolutionResponse
|
||||
|
||||
// Try to parse as diagnostic response first
|
||||
// Try to parse as diagnostic response first (with eBPF support)
|
||||
if err := json.Unmarshal([]byte(content), &diagnosticResp); err == nil && diagnosticResp.ResponseType == "diagnostic" {
|
||||
// Handle diagnostic phase
|
||||
fmt.Printf("\nReasoning: %s\n", diagnosticResp.Reasoning)
|
||||
|
||||
if len(diagnosticResp.Commands) == 0 {
|
||||
fmt.Println("No commands to execute in diagnostic phase")
|
||||
break
|
||||
}
|
||||
|
||||
// Execute commands and collect results
|
||||
commandResults := make([]CommandResult, 0, len(diagnosticResp.Commands))
|
||||
for _, cmd := range diagnosticResp.Commands {
|
||||
fmt.Printf("\nExecuting command '%s': %s\n", cmd.ID, cmd.Command)
|
||||
result := a.executor.Execute(cmd)
|
||||
commandResults = append(commandResults, result)
|
||||
if len(diagnosticResp.Commands) > 0 {
|
||||
fmt.Printf("🔧 Executing diagnostic commands...\n")
|
||||
for _, cmd := range diagnosticResp.Commands {
|
||||
fmt.Printf("⚙️ Executing command '%s': %s\n", cmd.ID, cmd.Command)
|
||||
result := a.executor.Execute(cmd)
|
||||
commandResults = append(commandResults, result)
|
||||
|
||||
fmt.Printf("Output:\n%s\n", result.Output)
|
||||
if result.Error != "" {
|
||||
fmt.Printf("Error: %s\n", result.Error)
|
||||
if result.ExitCode == 0 {
|
||||
fmt.Printf("✅ Command '%s' completed successfully\n", cmd.ID)
|
||||
} else {
|
||||
fmt.Printf("❌ Command '%s' failed with exit code %d\n", cmd.ID, result.ExitCode)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Prepare command results as user message
|
||||
resultsJSON, err := json.MarshalIndent(commandResults, "", " ")
|
||||
// Execute eBPF programs if present
|
||||
var ebpfResults []map[string]interface{}
|
||||
if len(diagnosticResp.EBPFPrograms) > 0 {
|
||||
fmt.Printf("🔬 Executing %d eBPF programs...\n", len(diagnosticResp.EBPFPrograms))
|
||||
ebpfResults = a.executeEBPFPrograms(diagnosticResp.EBPFPrograms)
|
||||
}
|
||||
|
||||
// Prepare combined results as user message
|
||||
allResults := map[string]interface{}{
|
||||
"command_results": commandResults,
|
||||
"executed_commands": len(commandResults),
|
||||
}
|
||||
|
||||
// Include eBPF results if any were executed
|
||||
if len(ebpfResults) > 0 {
|
||||
allResults["ebpf_results"] = ebpfResults
|
||||
allResults["executed_ebpf_programs"] = len(ebpfResults)
|
||||
|
||||
// Extract evidence summary for TensorZero
|
||||
evidenceSummary := make([]string, 0)
|
||||
for _, result := range ebpfResults {
|
||||
name := result["name"]
|
||||
eventCount := result["data_points"]
|
||||
description := result["description"]
|
||||
status := result["status"]
|
||||
|
||||
summaryStr := fmt.Sprintf("%s: %v events (%s) - %s", name, eventCount, status, description)
|
||||
evidenceSummary = append(evidenceSummary, summaryStr)
|
||||
}
|
||||
allResults["ebpf_evidence_summary"] = evidenceSummary
|
||||
|
||||
fmt.Printf("<22> Sending eBPF monitoring data to TensorZero:\n")
|
||||
for _, summary := range evidenceSummary {
|
||||
fmt.Printf(" - %s\n", summary)
|
||||
}
|
||||
|
||||
fmt.Printf("✅ Executed %d commands, %d eBPF programs\n", len(commandResults), len(ebpfResults))
|
||||
} else {
|
||||
fmt.Printf("✅ Executed %d commands\n", len(commandResults))
|
||||
}
|
||||
|
||||
resultsJSON, err := json.MarshalIndent(allResults, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal command results: %w", err)
|
||||
}
|
||||
@@ -178,6 +217,127 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// executeEBPFPrograms executes REAL eBPF monitoring programs using the actual eBPF manager
|
||||
func (a *LinuxDiagnosticAgent) executeEBPFPrograms(ebpfPrograms []EBPFRequest) []map[string]interface{} {
|
||||
var results []map[string]interface{}
|
||||
|
||||
if a.ebpfManager == nil {
|
||||
fmt.Printf("❌ eBPF manager not initialized\n")
|
||||
return results
|
||||
}
|
||||
|
||||
for _, prog := range ebpfPrograms {
|
||||
fmt.Printf("🔬 Starting eBPF program [%s]: %s -> %s (%ds)\n", prog.Name, prog.Type, prog.Target, int(prog.Duration))
|
||||
|
||||
// Actually start the eBPF program using the real manager
|
||||
programID, err := a.ebpfManager.StartEBPFProgram(prog)
|
||||
if err != nil {
|
||||
fmt.Printf("❌ Failed to start eBPF program [%s]: %v\n", prog.Name, err)
|
||||
result := map[string]interface{}{
|
||||
"name": prog.Name,
|
||||
"type": prog.Type,
|
||||
"target": prog.Target,
|
||||
"duration": int(prog.Duration),
|
||||
"description": prog.Description,
|
||||
"status": "failed",
|
||||
"error": err.Error(),
|
||||
"success": false,
|
||||
}
|
||||
results = append(results, result)
|
||||
continue
|
||||
}
|
||||
|
||||
// Let the eBPF program run for the specified duration
|
||||
fmt.Printf("⏰ Waiting %d seconds for eBPF program to collect data...\n", int(prog.Duration))
|
||||
time.Sleep(time.Duration(prog.Duration) * time.Second)
|
||||
|
||||
// Give the collectEvents goroutine a moment to finish and store results
|
||||
fmt.Printf("⏳ Allowing program to complete data collection...\n")
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
|
||||
// Get the results (should be in completedResults now)
|
||||
fmt.Printf("📊 Getting results for eBPF program [%s]...\n", prog.Name)
|
||||
|
||||
// Use a channel to implement timeout for GetProgramResults
|
||||
type resultPair struct {
|
||||
trace *EBPFTrace
|
||||
err error
|
||||
}
|
||||
resultChan := make(chan resultPair, 1)
|
||||
|
||||
go func() {
|
||||
trace, err := a.ebpfManager.GetProgramResults(programID)
|
||||
resultChan <- resultPair{trace, err}
|
||||
}()
|
||||
|
||||
var trace *EBPFTrace
|
||||
var resultErr error
|
||||
|
||||
select {
|
||||
case result := <-resultChan:
|
||||
trace = result.trace
|
||||
resultErr = result.err
|
||||
case <-time.After(3 * time.Second):
|
||||
resultErr = fmt.Errorf("timeout getting results after 3 seconds")
|
||||
}
|
||||
|
||||
// Try to stop the program (may already be stopped by collectEvents)
|
||||
fmt.Printf("🛑 Stopping eBPF program [%s]...\n", prog.Name)
|
||||
stopErr := a.ebpfManager.StopProgram(programID)
|
||||
if stopErr != nil {
|
||||
fmt.Printf("⚠️ eBPF program [%s] cleanup: %v (may have already completed)\n", prog.Name, stopErr)
|
||||
// Don't return here, we still want to process results if we got them
|
||||
}
|
||||
|
||||
if resultErr != nil {
|
||||
fmt.Printf("❌ Failed to get results for eBPF program [%s]: %v\n", prog.Name, resultErr)
|
||||
result := map[string]interface{}{
|
||||
"name": prog.Name,
|
||||
"type": prog.Type,
|
||||
"target": prog.Target,
|
||||
"duration": int(prog.Duration),
|
||||
"description": prog.Description,
|
||||
"status": "collection_failed",
|
||||
"error": resultErr.Error(),
|
||||
"success": false,
|
||||
}
|
||||
results = append(results, result)
|
||||
continue
|
||||
} // Process the real eBPF trace data
|
||||
result := map[string]interface{}{
|
||||
"name": prog.Name,
|
||||
"type": prog.Type,
|
||||
"target": prog.Target,
|
||||
"duration": int(prog.Duration),
|
||||
"description": prog.Description,
|
||||
"status": "completed",
|
||||
"success": true,
|
||||
}
|
||||
|
||||
// Extract real data from the trace
|
||||
if trace != nil {
|
||||
result["trace_id"] = trace.TraceID
|
||||
result["data_points"] = trace.EventCount
|
||||
result["events"] = trace.Events
|
||||
result["summary"] = trace.Summary
|
||||
result["process_list"] = trace.ProcessList
|
||||
result["start_time"] = trace.StartTime.Format(time.RFC3339)
|
||||
result["end_time"] = trace.EndTime.Format(time.RFC3339)
|
||||
result["actual_duration"] = trace.EndTime.Sub(trace.StartTime).Seconds()
|
||||
|
||||
fmt.Printf("✅ eBPF program [%s] completed - collected %d real events\n", prog.Name, trace.EventCount)
|
||||
} else {
|
||||
result["data_points"] = 0
|
||||
result["error"] = "No trace data returned"
|
||||
fmt.Printf("⚠️ eBPF program [%s] completed but returned no trace data\n", prog.Name)
|
||||
}
|
||||
|
||||
results = append(results, result)
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
// TensorZeroRequest represents a request structure compatible with TensorZero's episode_id
|
||||
type TensorZeroRequest struct {
|
||||
Model string `json:"model"`
|
||||
@@ -193,6 +353,11 @@ type TensorZeroResponse struct {
|
||||
|
||||
// sendRequest sends a request to the TensorZero API via Supabase proxy with JWT authentication
|
||||
func (a *LinuxDiagnosticAgent) sendRequest(messages []openai.ChatCompletionMessage) (*openai.ChatCompletionResponse, error) {
|
||||
return a.sendRequestWithEpisode(messages, "")
|
||||
}
|
||||
|
||||
// sendRequestWithEpisode sends a request with a specific episode ID
|
||||
func (a *LinuxDiagnosticAgent) sendRequestWithEpisode(messages []openai.ChatCompletionMessage, episodeID string) (*openai.ChatCompletionResponse, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
@@ -202,9 +367,12 @@ func (a *LinuxDiagnosticAgent) sendRequest(messages []openai.ChatCompletionMessa
|
||||
Messages: messages,
|
||||
}
|
||||
|
||||
// Include tensorzero::episode_id for conversation continuity (if we have one)
|
||||
// Include tensorzero::episode_id for conversation continuity
|
||||
// Use agent's existing episode ID if available, otherwise use provided one
|
||||
if a.episodeID != "" {
|
||||
tzRequest.EpisodeID = a.episodeID
|
||||
} else if episodeID != "" {
|
||||
tzRequest.EpisodeID = episodeID
|
||||
}
|
||||
|
||||
fmt.Printf("Debug: Sending request to model: %s", a.model)
|
||||
|
||||
Reference in New Issue
Block a user