RunTrace starts a network trace on a node. It creates a privileged pod on the target node, runs bpftrace, and streams output.
(ctx context.Context, config TraceConfig, nodeName, debugPodNamespace string)
| 64 | // RunTrace starts a network trace on a node. |
| 65 | // It creates a privileged pod on the target node, runs bpftrace, and streams output. |
| 66 | func RunTrace(ctx context.Context, config TraceConfig, nodeName, debugPodNamespace string) error { |
| 67 | clientset, err := kubernetes.NewForConfig(config.RestConfig) |
| 68 | if err != nil { |
| 69 | return fmt.Errorf("error constructing kube clientset: %w", err) |
| 70 | } |
| 71 | |
| 72 | // Validate node OS |
| 73 | err = validateOperatingSystemSupportedForNode(ctx, clientset, nodeName) |
| 74 | if err != nil { |
| 75 | return fmt.Errorf("error validating operating system for node %s: %w", nodeName, err) |
| 76 | } |
| 77 | |
| 78 | // Create the trace pod |
| 79 | pod := hostNetworkPodForTrace(config, debugPodNamespace, nodeName) |
| 80 | |
| 81 | fmt.Printf("Creating trace pod %s/%s on node %s\n", debugPodNamespace, pod.Name, nodeName) |
| 82 | createdPod, err := clientset.CoreV1(). |
| 83 | Pods(debugPodNamespace). |
| 84 | Create(ctx, pod, metav1.CreateOptions{}) |
| 85 | if err != nil { |
| 86 | return fmt.Errorf("error creating trace pod %s in namespace %s: %w", pod.Name, debugPodNamespace, err) |
| 87 | } |
| 88 | |
| 89 | // Ensure cleanup on exit (Ctrl-C, error, or normal termination) |
| 90 | // Note: intentionally using context.Background() for cleanup so it runs even if ctx is canceled |
| 91 | defer func() { //nolint:contextcheck // cleanup must run regardless of parent context state |
| 92 | fmt.Printf("Cleaning up trace pod %s/%s\n", debugPodNamespace, createdPod.Name) |
| 93 | deleteCtx := context.Background() // Use fresh context for cleanup |
| 94 | deleteErr := clientset.CoreV1(). |
| 95 | Pods(debugPodNamespace). |
| 96 | Delete(deleteCtx, createdPod.Name, metav1.DeleteOptions{}) |
| 97 | if deleteErr != nil { |
| 98 | fmt.Fprintf(os.Stderr, "warning: failed to delete trace pod %s: %v\n", createdPod.Name, deleteErr) |
| 99 | } |
| 100 | }() |
| 101 | |
| 102 | // Wait for pod to be running |
| 103 | err = waitForContainerRunning(ctx, config.Timeout, clientset, debugPodNamespace, createdPod.Name, createdPod.Spec.Containers[0].Name) |
| 104 | if err != nil { |
| 105 | return fmt.Errorf("error waiting for trace pod to start: %w", err) |
| 106 | } |
| 107 | |
| 108 | fmt.Printf("Trace pod ready, starting trace...\n") |
| 109 | |
| 110 | // First, fetch and display reason/state codes from kernel |
| 111 | // These are kernel-version specific so we read them at runtime |
| 112 | fmt.Printf("\n") |
| 113 | |
| 114 | // Display SKB drop reason codes (for DROP events) |
| 115 | dropReasonsCommand := DropReasonsCommand() |
| 116 | err = execInPod(ctx, config.RestConfig, clientset, debugPodNamespace, createdPod.Name, createdPod.Spec.Containers[0].Name, dropReasonsCommand, os.Stdout, os.Stderr) |
| 117 | if err != nil { |
| 118 | // Non-fatal: continue even if we can't get reason codes |
| 119 | fmt.Fprintf(os.Stderr, "warning: could not fetch drop reason codes: %v\n", err) |
| 120 | } |
| 121 | fmt.Printf("\n") |
| 122 | |
| 123 | // Generate and run the bpftrace script |
no test coverage detected