MCPcopy
hub / github.com/microsoft/retina / RunTrace

Function RunTrace

shell/trace.go:66–142  ·  view source on GitHub ↗

RunTrace starts a network trace on a node. It creates a privileged pod on the target node, runs bpftrace, and streams output.

(ctx context.Context, config TraceConfig, nodeName, debugPodNamespace string)

Source from the content-addressed store, hash-verified

64// RunTrace starts a network trace on a node.
65// It creates a privileged pod on the target node, runs bpftrace, and streams output.
66func RunTrace(ctx context.Context, config TraceConfig, nodeName, debugPodNamespace string) error {
67 clientset, err := kubernetes.NewForConfig(config.RestConfig)
68 if err != nil {
69 return fmt.Errorf("error constructing kube clientset: %w", err)
70 }
71
72 // Validate node OS
73 err = validateOperatingSystemSupportedForNode(ctx, clientset, nodeName)
74 if err != nil {
75 return fmt.Errorf("error validating operating system for node %s: %w", nodeName, err)
76 }
77
78 // Create the trace pod
79 pod := hostNetworkPodForTrace(config, debugPodNamespace, nodeName)
80
81 fmt.Printf("Creating trace pod %s/%s on node %s\n", debugPodNamespace, pod.Name, nodeName)
82 createdPod, err := clientset.CoreV1().
83 Pods(debugPodNamespace).
84 Create(ctx, pod, metav1.CreateOptions{})
85 if err != nil {
86 return fmt.Errorf("error creating trace pod %s in namespace %s: %w", pod.Name, debugPodNamespace, err)
87 }
88
89 // Ensure cleanup on exit (Ctrl-C, error, or normal termination)
90 // Note: intentionally using context.Background() for cleanup so it runs even if ctx is canceled
91 defer func() { //nolint:contextcheck // cleanup must run regardless of parent context state
92 fmt.Printf("Cleaning up trace pod %s/%s\n", debugPodNamespace, createdPod.Name)
93 deleteCtx := context.Background() // Use fresh context for cleanup
94 deleteErr := clientset.CoreV1().
95 Pods(debugPodNamespace).
96 Delete(deleteCtx, createdPod.Name, metav1.DeleteOptions{})
97 if deleteErr != nil {
98 fmt.Fprintf(os.Stderr, "warning: failed to delete trace pod %s: %v\n", createdPod.Name, deleteErr)
99 }
100 }()
101
102 // Wait for pod to be running
103 err = waitForContainerRunning(ctx, config.Timeout, clientset, debugPodNamespace, createdPod.Name, createdPod.Spec.Containers[0].Name)
104 if err != nil {
105 return fmt.Errorf("error waiting for trace pod to start: %w", err)
106 }
107
108 fmt.Printf("Trace pod ready, starting trace...\n")
109
110 // First, fetch and display reason/state codes from kernel
111 // These are kernel-version specific so we read them at runtime
112 fmt.Printf("\n")
113
114 // Display SKB drop reason codes (for DROP events)
115 dropReasonsCommand := DropReasonsCommand()
116 err = execInPod(ctx, config.RestConfig, clientset, debugPodNamespace, createdPod.Name, createdPod.Spec.Containers[0].Name, dropReasonsCommand, os.Stdout, os.Stderr)
117 if err != nil {
118 // Non-fatal: continue even if we can't get reason codes
119 fmt.Fprintf(os.Stderr, "warning: could not fetch drop reason codes: %v\n", err)
120 }
121 fmt.Printf("\n")
122
123 // Generate and run the bpftrace script

Callers 1

runBpftraceFunction · 0.92

Calls 10

GenerateMethod · 0.95
hostNetworkPodForTraceFunction · 0.85
waitForContainerRunningFunction · 0.85
DropReasonsCommandFunction · 0.85
execInPodFunction · 0.85
NewScriptGeneratorFunction · 0.85
DeleteMethod · 0.65
ErrMethod · 0.65
CreateMethod · 0.45

Tested by

no test coverage detected