Remove old eBPF implementations - keep only new BCC-style concurrent tracing
This commit is contained in:
526
agent.go
526
agent.go
@@ -2,12 +2,13 @@ package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/sashabaranov/go-openai"
|
||||
@@ -15,9 +16,35 @@ import (
|
||||
|
||||
// DiagnosticResponse represents the diagnostic phase response from AI
|
||||
type DiagnosticResponse struct {
|
||||
ResponseType string `json:"response_type"`
|
||||
Reasoning string `json:"reasoning"`
|
||||
Commands []Command `json:"commands"`
|
||||
ResponseType string `json:"response_type"`
|
||||
Phase string `json:"phase"`
|
||||
Analysis string `json:"analysis"`
|
||||
Commands []string `json:"commands"`
|
||||
NextSteps []string `json:"next_steps"`
|
||||
Reasoning string `json:"reasoning"`
|
||||
ConfidenceLevel float64 `json:"confidence_level"`
|
||||
}
|
||||
|
||||
// EBPFRequest represents a request for eBPF program execution
|
||||
type EBPFRequest struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Target string `json:"target"`
|
||||
Duration int `json:"duration"`
|
||||
Filters map[string]string `json:"filters,omitempty"`
|
||||
Description string `json:"description"`
|
||||
}
|
||||
|
||||
// EBPFEnhancedDiagnosticResponse represents the enhanced diagnostic response with eBPF
|
||||
type EBPFEnhancedDiagnosticResponse struct {
|
||||
ResponseType string `json:"response_type"`
|
||||
Phase string `json:"phase"`
|
||||
Analysis string `json:"analysis"`
|
||||
Commands []string `json:"commands"`
|
||||
EBPFPrograms []EBPFRequest `json:"ebpf_programs"`
|
||||
NextSteps []string `json:"next_steps"`
|
||||
Reasoning string `json:"reasoning"`
|
||||
ConfidenceLevel float64 `json:"confidence_level"`
|
||||
}
|
||||
|
||||
// ResolutionResponse represents the resolution phase response from AI
|
||||
@@ -35,6 +62,20 @@ type Command struct {
|
||||
Description string `json:"description"`
|
||||
}
|
||||
|
||||
// AgentConfig holds configuration for concurrent execution
|
||||
type AgentConfig struct {
|
||||
MaxConcurrentTasks int `json:"max_concurrent_tasks"`
|
||||
CollectiveResults bool `json:"collective_results"`
|
||||
}
|
||||
|
||||
// DefaultAgentConfig returns default configuration
|
||||
func DefaultAgentConfig() *AgentConfig {
|
||||
return &AgentConfig{
|
||||
MaxConcurrentTasks: 10, // Default to 10 concurrent forks
|
||||
CollectiveResults: true, // Send results collectively when all finish
|
||||
}
|
||||
}
|
||||
|
||||
// CommandResult represents the result of executing a command
|
||||
type CommandResult struct {
|
||||
ID string `json:"id"`
|
||||
@@ -49,8 +90,9 @@ type LinuxDiagnosticAgent struct {
|
||||
client *openai.Client
|
||||
model string
|
||||
executor *CommandExecutor
|
||||
episodeID string // TensorZero episode ID for conversation continuity
|
||||
ebpfManager EBPFManagerInterface // eBPF monitoring capabilities
|
||||
episodeID string // TensorZero episode ID for conversation continuity
|
||||
ebpfManager *BCCTraceManager // BCC-style eBPF tracing capabilities
|
||||
config *AgentConfig // Configuration for concurrent execution
|
||||
}
|
||||
|
||||
// NewLinuxDiagnosticAgent creates a new diagnostic agent
|
||||
@@ -73,10 +115,11 @@ func NewLinuxDiagnosticAgent() *LinuxDiagnosticAgent {
|
||||
client: nil, // Not used anymore
|
||||
model: model,
|
||||
executor: NewCommandExecutor(10 * time.Second), // 10 second timeout for commands
|
||||
config: DefaultAgentConfig(), // Default concurrent execution config
|
||||
}
|
||||
|
||||
// Initialize eBPF capabilities
|
||||
agent.ebpfManager = NewCiliumEBPFManager()
|
||||
// Initialize BCC-style eBPF capabilities
|
||||
agent.ebpfManager = NewBCCTraceManager()
|
||||
|
||||
return agent
|
||||
}
|
||||
@@ -127,7 +170,13 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
|
||||
commandResults := make([]CommandResult, 0, len(diagnosticResp.Commands))
|
||||
if len(diagnosticResp.Commands) > 0 {
|
||||
fmt.Printf("🔧 Executing diagnostic commands...\n")
|
||||
for _, cmd := range diagnosticResp.Commands {
|
||||
for i, cmdStr := range diagnosticResp.Commands {
|
||||
// Convert string to Command struct
|
||||
cmd := Command{
|
||||
ID: fmt.Sprintf("cmd_%d", i),
|
||||
Command: cmdStr,
|
||||
Description: fmt.Sprintf("Diagnostic command: %s", cmdStr),
|
||||
}
|
||||
result := a.executor.Execute(cmd)
|
||||
commandResults = append(commandResults, result)
|
||||
|
||||
@@ -137,10 +186,14 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
|
||||
}
|
||||
}
|
||||
|
||||
// Execute eBPF programs if present
|
||||
// Execute eBPF programs if present - support both old and new formats
|
||||
var ebpfResults []map[string]interface{}
|
||||
if len(diagnosticResp.EBPFPrograms) > 0 {
|
||||
ebpfResults = a.executeEBPFPrograms(diagnosticResp.EBPFPrograms)
|
||||
fmt.Printf("🔬 AI requested %d eBPF traces for enhanced diagnostics\n", len(diagnosticResp.EBPFPrograms))
|
||||
|
||||
// Convert EBPFPrograms to TraceSpecs and execute concurrently
|
||||
traceSpecs := a.convertEBPFProgramsToTraceSpecs(diagnosticResp.EBPFPrograms)
|
||||
ebpfResults = a.executeBCCTracesConcurrently(traceSpecs)
|
||||
}
|
||||
|
||||
// Prepare combined results as user message
|
||||
@@ -204,193 +257,59 @@ func (a *LinuxDiagnosticAgent) DiagnoseIssue(issue string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// executeEBPFPrograms executes REAL eBPF monitoring programs using the actual eBPF manager
|
||||
func (a *LinuxDiagnosticAgent) executeEBPFPrograms(ebpfPrograms []EBPFRequest) []map[string]interface{} {
|
||||
var results []map[string]interface{}
|
||||
|
||||
if a.ebpfManager == nil {
|
||||
fmt.Printf("❌ eBPF manager not initialized\n")
|
||||
return results
|
||||
}
|
||||
|
||||
for _, prog := range ebpfPrograms {
|
||||
// eBPF program starting - only show in debug mode
|
||||
|
||||
// Actually start the eBPF program using the real manager
|
||||
programID, err := a.ebpfManager.StartEBPFProgram(prog)
|
||||
if err != nil {
|
||||
fmt.Printf("❌ Failed to start eBPF program [%s]: %v\n", prog.Name, err)
|
||||
result := map[string]interface{}{
|
||||
"name": prog.Name,
|
||||
"type": prog.Type,
|
||||
"target": prog.Target,
|
||||
"duration": int(prog.Duration),
|
||||
"description": prog.Description,
|
||||
"status": "failed",
|
||||
"error": err.Error(),
|
||||
"success": false,
|
||||
}
|
||||
results = append(results, result)
|
||||
continue
|
||||
}
|
||||
|
||||
// Let the eBPF program run for the specified duration
|
||||
time.Sleep(time.Duration(prog.Duration) * time.Second)
|
||||
|
||||
// Give the collectEvents goroutine a moment to finish and store results
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
|
||||
// Use a channel to implement timeout for GetProgramResults
|
||||
type resultPair struct {
|
||||
trace *EBPFTrace
|
||||
err error
|
||||
}
|
||||
resultChan := make(chan resultPair, 1)
|
||||
|
||||
go func() {
|
||||
trace, err := a.ebpfManager.GetProgramResults(programID)
|
||||
resultChan <- resultPair{trace, err}
|
||||
}()
|
||||
|
||||
var trace *EBPFTrace
|
||||
var resultErr error
|
||||
|
||||
select {
|
||||
case result := <-resultChan:
|
||||
trace = result.trace
|
||||
resultErr = result.err
|
||||
case <-time.After(3 * time.Second):
|
||||
resultErr = fmt.Errorf("timeout getting results after 3 seconds")
|
||||
}
|
||||
|
||||
// Try to stop the program (may already be stopped by collectEvents)
|
||||
stopErr := a.ebpfManager.StopProgram(programID)
|
||||
if stopErr != nil {
|
||||
// Only show warning in debug mode - this is normal for completed programs
|
||||
}
|
||||
|
||||
if resultErr != nil {
|
||||
fmt.Printf("❌ Failed to get results for eBPF program [%s]: %v\n", prog.Name, resultErr)
|
||||
result := map[string]interface{}{
|
||||
"name": prog.Name,
|
||||
"type": prog.Type,
|
||||
"target": prog.Target,
|
||||
"duration": int(prog.Duration),
|
||||
"description": prog.Description,
|
||||
"status": "collection_failed",
|
||||
"error": resultErr.Error(),
|
||||
"success": false,
|
||||
}
|
||||
results = append(results, result)
|
||||
continue
|
||||
} // Process the real eBPF trace data
|
||||
result := map[string]interface{}{
|
||||
"name": prog.Name,
|
||||
"type": prog.Type,
|
||||
"target": prog.Target,
|
||||
"duration": int(prog.Duration),
|
||||
"description": prog.Description,
|
||||
"status": "completed",
|
||||
"success": true,
|
||||
}
|
||||
|
||||
// Extract real data from the trace
|
||||
if trace != nil {
|
||||
result["trace_id"] = trace.TraceID
|
||||
result["data_points"] = trace.EventCount
|
||||
result["events"] = trace.Events
|
||||
result["summary"] = trace.Summary
|
||||
result["process_list"] = trace.ProcessList
|
||||
result["start_time"] = trace.StartTime.Format(time.RFC3339)
|
||||
result["end_time"] = trace.EndTime.Format(time.RFC3339)
|
||||
result["actual_duration"] = trace.EndTime.Sub(trace.StartTime).Seconds()
|
||||
|
||||
} else {
|
||||
result["data_points"] = 0
|
||||
result["error"] = "No trace data returned"
|
||||
fmt.Printf("⚠️ eBPF program [%s] completed but returned no trace data\n", prog.Name)
|
||||
}
|
||||
|
||||
results = append(results, result)
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
// TensorZeroRequest represents a request structure compatible with TensorZero's episode_id
|
||||
type TensorZeroRequest struct {
|
||||
Model string `json:"model"`
|
||||
Messages []openai.ChatCompletionMessage `json:"messages"`
|
||||
EpisodeID string `json:"tensorzero::episode_id,omitempty"`
|
||||
}
|
||||
|
||||
// TensorZeroResponse represents TensorZero's response with episode_id
|
||||
type TensorZeroResponse struct {
|
||||
openai.ChatCompletionResponse
|
||||
EpisodeID string `json:"episode_id"`
|
||||
}
|
||||
|
||||
// sendRequest sends a request to the TensorZero API via Supabase proxy with JWT authentication
|
||||
// sendRequest sends a request to TensorZero via Supabase proxy (without episode ID)
|
||||
func (a *LinuxDiagnosticAgent) sendRequest(messages []openai.ChatCompletionMessage) (*openai.ChatCompletionResponse, error) {
|
||||
return a.sendRequestWithEpisode(messages, "")
|
||||
}
|
||||
|
||||
// sendRequestWithEpisode sends a request with a specific episode ID
|
||||
// sendRequestWithEpisode sends a request to TensorZero via Supabase proxy with episode ID for conversation continuity
|
||||
func (a *LinuxDiagnosticAgent) sendRequestWithEpisode(messages []openai.ChatCompletionMessage, episodeID string) (*openai.ChatCompletionResponse, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// Create TensorZero-compatible request
|
||||
tzRequest := TensorZeroRequest{
|
||||
Model: a.model,
|
||||
Messages: messages,
|
||||
// Convert messages to the expected format
|
||||
messageMaps := make([]map[string]interface{}, len(messages))
|
||||
for i, msg := range messages {
|
||||
messageMaps[i] = map[string]interface{}{
|
||||
"role": msg.Role,
|
||||
"content": msg.Content,
|
||||
}
|
||||
}
|
||||
|
||||
// Include tensorzero::episode_id for conversation continuity
|
||||
// Use agent's existing episode ID if available, otherwise use provided one
|
||||
if a.episodeID != "" {
|
||||
tzRequest.EpisodeID = a.episodeID
|
||||
} else if episodeID != "" {
|
||||
tzRequest.EpisodeID = episodeID
|
||||
// Create TensorZero request
|
||||
tzRequest := map[string]interface{}{
|
||||
"model": a.model,
|
||||
"messages": messageMaps,
|
||||
}
|
||||
|
||||
fmt.Printf("Debug: Sending request to model: %s", a.model)
|
||||
if a.episodeID != "" {
|
||||
fmt.Printf(" (episode: %s)", a.episodeID)
|
||||
// Add episode ID if provided
|
||||
if episodeID != "" {
|
||||
tzRequest["tensorzero::episode_id"] = episodeID
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
// Marshal the request
|
||||
// Marshal request
|
||||
requestBody, err := json.Marshal(tzRequest)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal request: %w", err)
|
||||
}
|
||||
|
||||
// Get Supabase project URL and build TensorZero proxy endpoint
|
||||
// Get Supabase URL
|
||||
supabaseURL := os.Getenv("SUPABASE_PROJECT_URL")
|
||||
if supabaseURL == "" {
|
||||
supabaseURL = "https://gpqzsricripnvbrpsyws.supabase.co"
|
||||
return nil, fmt.Errorf("SUPABASE_PROJECT_URL not set")
|
||||
}
|
||||
|
||||
// Build Supabase function URL with OpenAI v1 compatible path
|
||||
endpoint := supabaseURL + "/functions/v1/tensorzero-proxy/openai/v1/chat/completions"
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", endpoint, bytes.NewBuffer(requestBody))
|
||||
// Create HTTP request to TensorZero proxy
|
||||
endpoint := fmt.Sprintf("%s/functions/v1/tensorzero-proxy", supabaseURL)
|
||||
req, err := http.NewRequest("POST", endpoint, bytes.NewBuffer(requestBody))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
// Set headers
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Accept", "application/json")
|
||||
|
||||
// Add JWT authentication header
|
||||
accessToken, err := a.getAccessToken()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get access token: %w", err)
|
||||
}
|
||||
// Note: No authentication needed for TensorZero proxy based on the existing pattern
|
||||
|
||||
req.Header.Set("Authorization", "Bearer "+accessToken)
|
||||
|
||||
// Make the request
|
||||
// Send request
|
||||
client := &http.Client{Timeout: 30 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
@@ -398,55 +317,242 @@ func (a *LinuxDiagnosticAgent) sendRequestWithEpisode(messages []openai.ChatComp
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Read response body
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response: %w", err)
|
||||
// Check status code
|
||||
if resp.StatusCode != 200 {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("TensorZero proxy error: %d, body: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("TensorZero API request failed with status %d: %s", resp.StatusCode, string(body))
|
||||
// Parse response
|
||||
var tzResponse map[string]interface{}
|
||||
if err := json.NewDecoder(resp.Body).Decode(&tzResponse); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode response: %w", err)
|
||||
}
|
||||
|
||||
// Parse TensorZero response
|
||||
var tzResponse TensorZeroResponse
|
||||
if err := json.Unmarshal(body, &tzResponse); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal response: %w", err)
|
||||
// Convert to OpenAI format for compatibility
|
||||
choices, ok := tzResponse["choices"].([]interface{})
|
||||
if !ok || len(choices) == 0 {
|
||||
return nil, fmt.Errorf("no choices in response")
|
||||
}
|
||||
|
||||
// Extract episode_id from first response
|
||||
if a.episodeID == "" && tzResponse.EpisodeID != "" {
|
||||
a.episodeID = tzResponse.EpisodeID
|
||||
fmt.Printf("Debug: Extracted episode ID: %s\n", a.episodeID)
|
||||
// Extract the first choice
|
||||
firstChoice, ok := choices[0].(map[string]interface{})
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid choice format")
|
||||
}
|
||||
|
||||
return &tzResponse.ChatCompletionResponse, nil
|
||||
message, ok := firstChoice["message"].(map[string]interface{})
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid message format")
|
||||
}
|
||||
|
||||
content, ok := message["content"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid content format")
|
||||
}
|
||||
|
||||
// Create OpenAI-compatible response
|
||||
response := &openai.ChatCompletionResponse{
|
||||
Choices: []openai.ChatCompletionChoice{
|
||||
{
|
||||
Message: openai.ChatCompletionMessage{
|
||||
Role: openai.ChatMessageRoleAssistant,
|
||||
Content: content,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Update episode ID if provided in response
|
||||
if respEpisodeID, ok := tzResponse["episode_id"].(string); ok && respEpisodeID != "" {
|
||||
a.episodeID = respEpisodeID
|
||||
}
|
||||
|
||||
return response, nil
|
||||
}
|
||||
|
||||
// getAccessToken retrieves the current access token for authentication
|
||||
func (a *LinuxDiagnosticAgent) getAccessToken() (string, error) {
|
||||
// Read token from the standard token file location
|
||||
tokenPath := os.Getenv("TOKEN_PATH")
|
||||
if tokenPath == "" {
|
||||
tokenPath = "/var/lib/nannyagent/token.json"
|
||||
// convertEBPFProgramsToTraceSpecs converts old EBPFProgram format to new TraceSpec format
|
||||
func (a *LinuxDiagnosticAgent) convertEBPFProgramsToTraceSpecs(ebpfPrograms []EBPFRequest) []TraceSpec {
|
||||
var traceSpecs []TraceSpec
|
||||
|
||||
for _, prog := range ebpfPrograms {
|
||||
spec := a.convertToTraceSpec(prog)
|
||||
traceSpecs = append(traceSpecs, spec)
|
||||
}
|
||||
|
||||
tokenData, err := os.ReadFile(tokenPath)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to read token file: %w", err)
|
||||
}
|
||||
|
||||
var tokenInfo struct {
|
||||
AccessToken string `json:"access_token"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(tokenData, &tokenInfo); err != nil {
|
||||
return "", fmt.Errorf("failed to parse token file: %w", err)
|
||||
}
|
||||
|
||||
if tokenInfo.AccessToken == "" {
|
||||
return "", fmt.Errorf("access token is empty")
|
||||
}
|
||||
|
||||
return tokenInfo.AccessToken, nil
|
||||
return traceSpecs
|
||||
}
|
||||
|
||||
// convertToTraceSpec converts an EBPFRequest to a TraceSpec for BCC-style tracing
|
||||
func (a *LinuxDiagnosticAgent) convertToTraceSpec(prog EBPFRequest) TraceSpec {
|
||||
// Determine probe type based on target and type
|
||||
probeType := "p" // default to kprobe
|
||||
target := prog.Target
|
||||
|
||||
if strings.HasPrefix(target, "tracepoint:") {
|
||||
probeType = "t"
|
||||
target = strings.TrimPrefix(target, "tracepoint:")
|
||||
} else if strings.HasPrefix(target, "kprobe:") {
|
||||
probeType = "p"
|
||||
target = strings.TrimPrefix(target, "kprobe:")
|
||||
} else if prog.Type == "tracepoint" {
|
||||
probeType = "t"
|
||||
} else if prog.Type == "syscall" {
|
||||
// Convert syscall names to kprobe targets
|
||||
if !strings.HasPrefix(target, "__x64_sys_") && !strings.Contains(target, ":") {
|
||||
if strings.HasPrefix(target, "sys_") {
|
||||
target = "__x64_" + target
|
||||
} else {
|
||||
target = "__x64_sys_" + target
|
||||
}
|
||||
}
|
||||
probeType = "p"
|
||||
}
|
||||
|
||||
// Set default duration if not specified
|
||||
duration := prog.Duration
|
||||
if duration <= 0 {
|
||||
duration = 5 // default 5 seconds
|
||||
}
|
||||
|
||||
return TraceSpec{
|
||||
ProbeType: probeType,
|
||||
Target: target,
|
||||
Format: prog.Description, // Use description as format
|
||||
Arguments: []string{}, // Start with no arguments for compatibility
|
||||
Duration: duration,
|
||||
}
|
||||
}
|
||||
|
||||
// executeBCCTracesConcurrently executes multiple BCC traces concurrently with configurable parallelism
|
||||
func (a *LinuxDiagnosticAgent) executeBCCTracesConcurrently(traceSpecs []TraceSpec) []map[string]interface{} {
|
||||
if len(traceSpecs) == 0 {
|
||||
return []map[string]interface{}{}
|
||||
}
|
||||
|
||||
fmt.Printf("🚀 Executing %d BCC traces with max %d concurrent tasks\n", len(traceSpecs), a.config.MaxConcurrentTasks)
|
||||
|
||||
// Channel to limit concurrent goroutines
|
||||
semaphore := make(chan struct{}, a.config.MaxConcurrentTasks)
|
||||
resultsChan := make(chan map[string]interface{}, len(traceSpecs))
|
||||
var wg sync.WaitGroup
|
||||
|
||||
// Start all traces concurrently
|
||||
for i, spec := range traceSpecs {
|
||||
wg.Add(1)
|
||||
go func(index int, traceSpec TraceSpec) {
|
||||
defer wg.Done()
|
||||
|
||||
// Acquire semaphore
|
||||
semaphore <- struct{}{}
|
||||
defer func() { <-semaphore }()
|
||||
|
||||
result := a.executeSingleBCCTrace(index, traceSpec)
|
||||
resultsChan <- result
|
||||
}(i, spec)
|
||||
}
|
||||
|
||||
// Wait for all traces to complete
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(resultsChan)
|
||||
}()
|
||||
|
||||
// Collect all results
|
||||
var allResults []map[string]interface{}
|
||||
for result := range resultsChan {
|
||||
allResults = append(allResults, result)
|
||||
}
|
||||
|
||||
if a.config.CollectiveResults {
|
||||
fmt.Printf("✅ All %d BCC traces completed. Sending collective results to API layer.\n", len(allResults))
|
||||
}
|
||||
|
||||
return allResults
|
||||
}
|
||||
|
||||
// executeSingleBCCTrace executes a single BCC trace and returns the result
|
||||
func (a *LinuxDiagnosticAgent) executeSingleBCCTrace(index int, spec TraceSpec) map[string]interface{} {
|
||||
result := map[string]interface{}{
|
||||
"index": index,
|
||||
"target": spec.Target,
|
||||
"probe_type": spec.ProbeType,
|
||||
"success": false,
|
||||
"error": "",
|
||||
"start_time": time.Now().Format(time.RFC3339),
|
||||
}
|
||||
|
||||
fmt.Printf("🔍 [Task %d] Starting BCC trace: %s (type: %s)\n", index, spec.Target, spec.ProbeType)
|
||||
|
||||
// Start the trace
|
||||
traceID, err := a.ebpfManager.StartTrace(spec)
|
||||
if err != nil {
|
||||
result["error"] = fmt.Sprintf("Failed to start trace: %v", err)
|
||||
fmt.Printf("❌ [Task %d] Failed to start trace %s: %v\n", index, spec.Target, err)
|
||||
return result
|
||||
}
|
||||
|
||||
result["trace_id"] = traceID
|
||||
fmt.Printf("🚀 [Task %d] Trace %s started with ID: %s\n", index, spec.Target, traceID)
|
||||
|
||||
// Wait for the trace duration
|
||||
time.Sleep(time.Duration(spec.Duration) * time.Second)
|
||||
|
||||
// Get the trace result
|
||||
traceResult, err := a.ebpfManager.GetTraceResult(traceID)
|
||||
if err != nil {
|
||||
// Try to stop the trace if it's still running
|
||||
a.ebpfManager.StopTrace(traceID)
|
||||
result["error"] = fmt.Sprintf("Failed to get trace results: %v", err)
|
||||
fmt.Printf("❌ [Task %d] Failed to get results for trace %s: %v\n", index, spec.Target, err)
|
||||
return result
|
||||
}
|
||||
|
||||
// Populate result with trace data
|
||||
result["success"] = true
|
||||
result["end_time"] = time.Now().Format(time.RFC3339)
|
||||
result["event_count"] = traceResult.EventCount
|
||||
result["events_per_second"] = traceResult.Statistics.EventsPerSecond
|
||||
result["duration"] = traceResult.EndTime.Sub(traceResult.StartTime).Seconds()
|
||||
result["summary"] = traceResult.Summary
|
||||
|
||||
// Include sample events (limit to avoid large payloads)
|
||||
maxSampleEvents := 10
|
||||
if len(traceResult.Events) > 0 {
|
||||
sampleCount := len(traceResult.Events)
|
||||
if sampleCount > maxSampleEvents {
|
||||
sampleCount = maxSampleEvents
|
||||
}
|
||||
|
||||
sampleEvents := make([]map[string]interface{}, sampleCount)
|
||||
for i := 0; i < sampleCount; i++ {
|
||||
event := traceResult.Events[i]
|
||||
sampleEvents[i] = map[string]interface{}{
|
||||
"pid": event.PID,
|
||||
"tid": event.TID,
|
||||
"process_name": event.ProcessName,
|
||||
"message": event.Message,
|
||||
"timestamp": event.Timestamp,
|
||||
}
|
||||
}
|
||||
result["sample_events"] = sampleEvents
|
||||
}
|
||||
|
||||
// Include top processes
|
||||
if len(traceResult.Statistics.TopProcesses) > 0 {
|
||||
topProcesses := make([]map[string]interface{}, len(traceResult.Statistics.TopProcesses))
|
||||
for i, proc := range traceResult.Statistics.TopProcesses {
|
||||
topProcesses[i] = map[string]interface{}{
|
||||
"process_name": proc.ProcessName,
|
||||
"event_count": proc.EventCount,
|
||||
"percentage": proc.Percentage,
|
||||
}
|
||||
}
|
||||
result["top_processes"] = topProcesses
|
||||
}
|
||||
|
||||
fmt.Printf("✅ [Task %d] Trace %s completed: %d events (%.2f events/sec)\n",
|
||||
index, spec.Target, traceResult.EventCount, traceResult.Statistics.EventsPerSecond)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user