MCPcopy
hub / github.com/docker/docker-agent / runEvalCommand

Method runEvalCommand

cmd/root/eval.go:54–151  ·  view source on GitHub ↗
(cmd *cobra.Command, args []string)

Source from the content-addressed store, hash-verified

52}
53
54func (f *evalFlags) runEvalCommand(cmd *cobra.Command, args []string) (commandErr error) {
55 telemetry.TrackCommand(cmd.Context(), "eval", args)
56 defer func() { // do not inline this defer so that commandErr is not resolved early
57 telemetry.TrackCommandError(cmd.Context(), "eval", args, commandErr)
58 }()
59
60 ctx := cmd.Context()
61 agentFilename := args[0]
62 evalsDir := "./evals"
63 if len(args) >= 2 {
64 evalsDir = args[1]
65 }
66
67 // Output directory defaults to <evals-dir>/results
68 outputDir := f.outputDir
69 if outputDir == "" {
70 outputDir = filepath.Join(evalsDir, "results")
71 }
72
73 // Create output directory
74 if err := os.MkdirAll(outputDir, 0o700); err != nil {
75 return fmt.Errorf("creating output directory: %w", err)
76 }
77
78 // Generate run name upfront so we can set up logging
79 runName := evaluation.GenerateRunName()
80
81 // Set up log file with debug logging
82 logPath := filepath.Join(outputDir, runName+".log")
83 logFile, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o600)
84 if err != nil {
85 return fmt.Errorf("creating log file: %w", err)
86 }
87 defer logFile.Close()
88
89 // Set up slog to write debug logs to the log file
90 logHandler := slog.NewTextHandler(logFile, &slog.HandlerOptions{
91 Level: slog.LevelDebug,
92 })
93 originalLogger := slog.Default()
94 slog.SetDefault(slog.New(logHandler))
95 defer slog.SetDefault(originalLogger)
96
97 // Write header to log file
98 fmt.Fprintf(logFile, "=== Evaluation Run: %s ===\n", runName)
99 fmt.Fprintf(logFile, "Started: %s\n", time.Now().Format(time.RFC3339))
100 fmt.Fprintf(logFile, "Agent: %s\n", agentFilename)
101 fmt.Fprintf(logFile, "Evals dir: %s\n", evalsDir)
102 fmt.Fprintf(logFile, "Judge model: %s\n", f.JudgeModel)
103 fmt.Fprintf(logFile, "Concurrency: %d\n", f.Concurrency)
104 fmt.Fprintf(logFile, "\n")
105
106 // Create tee writer to write to both console and log file
107 consoleOut := cmd.OutOrStdout()
108 teeOut := io.MultiWriter(consoleOut, logFile)
109
110 // Check if console is a TTY (for colored output)
111 isTTY := false

Callers

nothing calls this directly

Calls 12

TrackCommandFunction · 0.92
TrackCommandErrorFunction · 0.92
GenerateRunNameFunction · 0.92
NewDefaultRegistryFunction · 0.92
EvaluateFunction · 0.92
SaveRunSessionsFunction · 0.92
SaveRunSessionsJSONFunction · 0.92
ContextMethod · 0.80
NowMethod · 0.80
IsTerminalMethod · 0.80
CloseMethod · 0.65
NewMethod · 0.45

Tested by

no test coverage detected