somewhat working ebpf bpftrace
This commit is contained in:
@@ -13,7 +13,9 @@ import (
|
||||
"time"
|
||||
|
||||
"nannyagentv2/internal/auth"
|
||||
"nannyagentv2/internal/logging"
|
||||
"nannyagentv2/internal/metrics"
|
||||
"nannyagentv2/internal/types"
|
||||
|
||||
"github.com/gorilla/websocket"
|
||||
"github.com/sashabaranov/go-openai"
|
||||
@@ -74,7 +76,7 @@ func NewWebSocketClient(agent *LinuxDiagnosticAgent, authManager *auth.AuthManag
|
||||
agentID = id
|
||||
// Agent ID retrieved successfully
|
||||
} else {
|
||||
fmt.Printf("❌ Failed to get agent ID from auth manager: %v\n", err)
|
||||
logging.Error("Failed to get agent ID from auth manager: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -178,7 +180,7 @@ func (c *WebSocketClient) connect() error {
|
||||
if err != nil {
|
||||
c.consecutiveFailures++
|
||||
if c.consecutiveFailures >= 5 && resp != nil {
|
||||
fmt.Printf("❌ WebSocket handshake failed with status: %d (failure #%d)\n", resp.StatusCode, c.consecutiveFailures)
|
||||
logging.Error("WebSocket handshake failed with status: %d (failure #%d)", resp.StatusCode, c.consecutiveFailures)
|
||||
}
|
||||
return fmt.Errorf("websocket connection failed: %v", err)
|
||||
}
|
||||
@@ -205,7 +207,7 @@ func (c *WebSocketClient) handleMessages() {
|
||||
case <-c.ctx.Done():
|
||||
// Only log context cancellation if there have been failures
|
||||
if c.consecutiveFailures >= 5 {
|
||||
fmt.Printf("📡 Context cancelled after %v, stopping message handler\n", time.Since(connectionStart))
|
||||
logging.Debug("Context cancelled after %v, stopping message handler", time.Since(connectionStart))
|
||||
}
|
||||
return
|
||||
default:
|
||||
@@ -223,14 +225,14 @@ func (c *WebSocketClient) handleMessages() {
|
||||
// Only log specific errors after failure threshold
|
||||
if c.consecutiveFailures >= 5 {
|
||||
if websocket.IsCloseError(err, websocket.CloseNormalClosure, websocket.CloseGoingAway) {
|
||||
log.Printf("🔒 WebSocket closed normally after %v: %v", connectionDuration, err)
|
||||
logging.Debug("WebSocket closed normally after %v: %v", connectionDuration, err)
|
||||
} else if websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseAbnormalClosure) {
|
||||
log.Printf("💥 ABNORMAL CLOSE after %v (code 1006 = server-side timeout/kill): %v", connectionDuration, err)
|
||||
log.Printf("🕒 Last read took %v, connection lived %v", readDuration, connectionDuration)
|
||||
logging.Error("ABNORMAL CLOSE after %v (code 1006 = server-side timeout/kill): %v", connectionDuration, err)
|
||||
logging.Debug("Last read took %v, connection lived %v", readDuration, connectionDuration)
|
||||
} else if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
|
||||
log.Printf("⏰ READ TIMEOUT after %v: %v", connectionDuration, err)
|
||||
logging.Warning("READ TIMEOUT after %v: %v", connectionDuration, err)
|
||||
} else {
|
||||
log.Printf("❌ WebSocket error after %v: %v", connectionDuration, err)
|
||||
logging.Error("WebSocket error after %v: %v", connectionDuration, err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -239,7 +241,7 @@ func (c *WebSocketClient) handleMessages() {
|
||||
|
||||
// Only show diagnostics after multiple failures
|
||||
if c.consecutiveFailures >= 5 {
|
||||
log.Printf("🔍 DIAGNOSTIC - Connection failed #%d after %v", c.consecutiveFailures, connectionDuration)
|
||||
logging.Debug("DIAGNOSTIC - Connection failed #%d after %v", c.consecutiveFailures, connectionDuration)
|
||||
}
|
||||
|
||||
// Attempt reconnection instead of returning immediately
|
||||
@@ -265,7 +267,7 @@ func (c *WebSocketClient) handleMessages() {
|
||||
// Task result acknowledged
|
||||
|
||||
default:
|
||||
log.Printf("⚠️ Unknown message type: %s", message.Type)
|
||||
logging.Warning("Unknown message type: %s", message.Type)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -276,14 +278,14 @@ func (c *WebSocketClient) handleInvestigationTask(data interface{}) {
|
||||
// Parse task data
|
||||
taskBytes, err := json.Marshal(data)
|
||||
if err != nil {
|
||||
log.Printf("❌ Error marshaling task data: %v", err)
|
||||
logging.Error("Error marshaling task data: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
var task InvestigationTask
|
||||
err = json.Unmarshal(taskBytes, &task)
|
||||
if err != nil {
|
||||
log.Printf("❌ Error unmarshaling investigation task: %v", err)
|
||||
logging.Error("Error unmarshaling investigation task: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -300,7 +302,7 @@ func (c *WebSocketClient) handleInvestigationTask(data interface{}) {
|
||||
|
||||
if err != nil {
|
||||
taskResult.Error = err.Error()
|
||||
fmt.Printf("❌ Task execution failed: %v\n", err)
|
||||
logging.Error("Task execution failed: %v", err)
|
||||
} else {
|
||||
taskResult.CommandResults = results
|
||||
// Task executed successfully
|
||||
@@ -356,7 +358,7 @@ func (c *WebSocketClient) executeDiagnosticCommands(diagnosticPayload map[string
|
||||
|
||||
if err != nil {
|
||||
result["error"] = err.Error()
|
||||
fmt.Printf("❌ Command [%s] failed: %v (exit code: %d)\n", id, err, exitCode)
|
||||
logging.Warning("Command [%s] failed: %v (exit code: %d)", id, err, exitCode)
|
||||
}
|
||||
|
||||
commandResults = append(commandResults, result)
|
||||
@@ -379,7 +381,7 @@ func (c *WebSocketClient) executeDiagnosticCommands(diagnosticPayload map[string
|
||||
|
||||
// executeEBPFPrograms executes eBPF monitoring programs using the real eBPF manager
|
||||
func (c *WebSocketClient) executeEBPFPrograms(ebpfPrograms []interface{}) []map[string]interface{} {
|
||||
var ebpfRequests []EBPFRequest
|
||||
var ebpfRequests []types.EBPFRequest
|
||||
|
||||
// Convert interface{} to EBPFRequest structs
|
||||
for _, prog := range ebpfPrograms {
|
||||
@@ -398,7 +400,7 @@ func (c *WebSocketClient) executeEBPFPrograms(ebpfPrograms []interface{}) []map[
|
||||
continue
|
||||
}
|
||||
|
||||
ebpfRequests = append(ebpfRequests, EBPFRequest{
|
||||
ebpfRequests = append(ebpfRequests, types.EBPFRequest{
|
||||
Name: name,
|
||||
Type: progType,
|
||||
Target: target,
|
||||
@@ -444,7 +446,7 @@ func (c *WebSocketClient) executeCommandsFromPayload(commands []interface{}) []m
|
||||
|
||||
if err != nil {
|
||||
result["error"] = err.Error()
|
||||
fmt.Printf("❌ Command [%s] failed: %v (exit code: %d)\n", id, err, exitCode)
|
||||
logging.Warning("Command [%s] failed: %v (exit code: %d)", id, err, exitCode)
|
||||
}
|
||||
|
||||
commandResults = append(commandResults, result)
|
||||
@@ -502,7 +504,7 @@ func (c *WebSocketClient) sendTaskResult(result TaskResult) {
|
||||
|
||||
err := c.conn.WriteJSON(message)
|
||||
if err != nil {
|
||||
log.Printf("❌ Error sending task result: %v", err)
|
||||
logging.Error("Error sending task result: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -516,7 +518,7 @@ func (c *WebSocketClient) startHeartbeat() {
|
||||
for {
|
||||
select {
|
||||
case <-c.ctx.Done():
|
||||
fmt.Printf("💓 Heartbeat stopped due to context cancellation\n")
|
||||
logging.Debug("Heartbeat stopped due to context cancellation")
|
||||
return
|
||||
case <-ticker.C:
|
||||
// Sending heartbeat
|
||||
@@ -531,8 +533,8 @@ func (c *WebSocketClient) startHeartbeat() {
|
||||
|
||||
err := c.conn.WriteJSON(heartbeat)
|
||||
if err != nil {
|
||||
log.Printf("❌ Error sending heartbeat: %v", err)
|
||||
fmt.Printf("💓 Heartbeat failed, connection likely dead\n")
|
||||
logging.Error("Error sending heartbeat: %v", err)
|
||||
logging.Debug("Heartbeat failed, connection likely dead")
|
||||
return
|
||||
}
|
||||
// Heartbeat sent
|
||||
@@ -656,14 +658,14 @@ func (c *WebSocketClient) handlePendingInvestigation(investigation PendingInvest
|
||||
for {
|
||||
tzResp, tzErr := c.agent.sendRequestWithEpisode(messages, episodeID)
|
||||
if tzErr != nil {
|
||||
fmt.Printf("⚠️ TensorZero continuation failed: %v\n", tzErr)
|
||||
logging.Warning("TensorZero continuation failed: %v", tzErr)
|
||||
// Fall back to marking completed with command results only
|
||||
c.updateInvestigationStatus(investigation.ID, "completed", resultsForDB, nil)
|
||||
return
|
||||
}
|
||||
|
||||
if len(tzResp.Choices) == 0 {
|
||||
fmt.Printf("⚠️ No choices in TensorZero response\n")
|
||||
logging.Warning("No choices in TensorZero response")
|
||||
c.updateInvestigationStatus(investigation.ID, "completed", resultsForDB, nil)
|
||||
return
|
||||
}
|
||||
@@ -672,7 +674,7 @@ func (c *WebSocketClient) handlePendingInvestigation(investigation PendingInvest
|
||||
if len(aiContent) > 300 {
|
||||
// AI response received successfully
|
||||
} else {
|
||||
fmt.Printf("🤖 AI Response: %s\n", aiContent)
|
||||
logging.Debug("AI Response: %s", aiContent)
|
||||
}
|
||||
|
||||
// Check if this is a resolution response (final)
|
||||
@@ -683,14 +685,14 @@ func (c *WebSocketClient) handlePendingInvestigation(investigation PendingInvest
|
||||
Confidence string `json:"confidence"`
|
||||
}
|
||||
|
||||
fmt.Printf("🔍 Analyzing AI response type...\n")
|
||||
logging.Debug("Analyzing AI response type...")
|
||||
|
||||
if err := json.Unmarshal([]byte(aiContent), &resolutionResp); err == nil && resolutionResp.ResponseType == "resolution" {
|
||||
// This is the final resolution - show summary and complete
|
||||
fmt.Printf("\n=== DIAGNOSIS COMPLETE ===\n")
|
||||
fmt.Printf("Root Cause: %s\n", resolutionResp.RootCause)
|
||||
fmt.Printf("Resolution Plan: %s\n", resolutionResp.ResolutionPlan)
|
||||
fmt.Printf("Confidence: %s\n", resolutionResp.Confidence)
|
||||
logging.Info("=== DIAGNOSIS COMPLETE ===")
|
||||
logging.Info("Root Cause: %s", resolutionResp.RootCause)
|
||||
logging.Info("Resolution Plan: %s", resolutionResp.ResolutionPlan)
|
||||
logging.Info("Confidence: %s", resolutionResp.Confidence)
|
||||
finalAIContent = aiContent
|
||||
break
|
||||
}
|
||||
@@ -703,7 +705,7 @@ func (c *WebSocketClient) handlePendingInvestigation(investigation PendingInvest
|
||||
}
|
||||
|
||||
if err := json.Unmarshal([]byte(aiContent), &diagnosticResp); err == nil && diagnosticResp.ResponseType == "diagnostic" {
|
||||
fmt.Printf("🔄 AI requested additional diagnostics, executing...\n")
|
||||
logging.Debug("AI requested additional diagnostics, executing...")
|
||||
|
||||
// Execute additional commands if any
|
||||
additionalResults := map[string]interface{}{
|
||||
@@ -711,7 +713,7 @@ func (c *WebSocketClient) handlePendingInvestigation(investigation PendingInvest
|
||||
}
|
||||
|
||||
if len(diagnosticResp.Commands) > 0 {
|
||||
fmt.Printf("🔧 Executing %d additional diagnostic commands...\n", len(diagnosticResp.Commands))
|
||||
logging.Debug("Executing %d additional diagnostic commands", len(diagnosticResp.Commands))
|
||||
commandResults := c.executeCommandsFromPayload(diagnosticResp.Commands)
|
||||
additionalResults["command_results"] = commandResults
|
||||
}
|
||||
@@ -738,7 +740,7 @@ func (c *WebSocketClient) handlePendingInvestigation(investigation PendingInvest
|
||||
}
|
||||
|
||||
// If neither resolution nor diagnostic, treat as final response
|
||||
fmt.Printf("⚠️ Unknown response type - treating as final response\n")
|
||||
logging.Warning("Unknown response type - treating as final response")
|
||||
finalAIContent = aiContent
|
||||
break
|
||||
}
|
||||
@@ -814,21 +816,21 @@ func (c *WebSocketClient) attemptReconnection() {
|
||||
|
||||
// Only show messages after 5 consecutive failures
|
||||
if c.consecutiveFailures >= 5 {
|
||||
log.Printf("🔄 Attempting WebSocket reconnection (attempt %d/%d) - %d consecutive failures", i+1, len(backoffDurations), c.consecutiveFailures)
|
||||
logging.Info("Attempting WebSocket reconnection (attempt %d/%d) - %d consecutive failures", i+1, len(backoffDurations), c.consecutiveFailures)
|
||||
}
|
||||
|
||||
time.Sleep(backoff)
|
||||
|
||||
if err := c.connect(); err != nil {
|
||||
if c.consecutiveFailures >= 5 {
|
||||
log.Printf("❌ Reconnection attempt %d failed: %v", i+1, err)
|
||||
logging.Warning("Reconnection attempt %d failed: %v", i+1, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Successfully reconnected - reset failure counter
|
||||
if c.consecutiveFailures >= 5 {
|
||||
log.Printf("✅ WebSocket reconnected successfully after %d failures", c.consecutiveFailures)
|
||||
logging.Info("WebSocket reconnected successfully after %d failures", c.consecutiveFailures)
|
||||
}
|
||||
c.consecutiveFailures = 0
|
||||
go c.handleMessages() // Restart message handling
|
||||
@@ -836,5 +838,5 @@ func (c *WebSocketClient) attemptReconnection() {
|
||||
}
|
||||
}
|
||||
|
||||
log.Printf("❌ Failed to reconnect after %d attempts, giving up", len(backoffDurations))
|
||||
logging.Error("Failed to reconnect after %d attempts, giving up", len(backoffDurations))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user