hub / github.com/docker/docker-agent / runEvalCommand

Method runEvalCommand

cmd/root/eval.go:54–151 · view source on GitHub ↗

(cmd *cobra.Command, args []string)

Source from the content-addressed store, hash-verified

52	}
53
54	func (f evalFlags) runEvalCommand(cmd cobra.Command, args []string) (commandErr error) {
55	telemetry.TrackCommand(cmd.Context(), "eval", args)
56	defer func() { // do not inline this defer so that commandErr is not resolved early
57	telemetry.TrackCommandError(cmd.Context(), "eval", args, commandErr)
58	}()
59
60	ctx := cmd.Context()
61	agentFilename := args[0]
62	evalsDir := "./evals"
63	if len(args) >= 2 {
64	evalsDir = args[1]
65	}
66
67	// Output directory defaults to <evals-dir>/results
68	outputDir := f.outputDir
69	if outputDir == "" {
70	outputDir = filepath.Join(evalsDir, "results")
71	}
72
73	// Create output directory
74	if err := os.MkdirAll(outputDir, 0o700); err != nil {
75	return fmt.Errorf("creating output directory: %w", err)
76	}
77
78	// Generate run name upfront so we can set up logging
79	runName := evaluation.GenerateRunName()
80
81	// Set up log file with debug logging
82	logPath := filepath.Join(outputDir, runName+".log")
83	logFile, err := os.OpenFile(logPath, os.O_CREATE\|os.O_WRONLY\|os.O_TRUNC, 0o600)
84	if err != nil {
85	return fmt.Errorf("creating log file: %w", err)
86	}
87	defer logFile.Close()
88
89	// Set up slog to write debug logs to the log file
90	logHandler := slog.NewTextHandler(logFile, &slog.HandlerOptions{
91	Level: slog.LevelDebug,
92	})
93	originalLogger := slog.Default()
94	slog.SetDefault(slog.New(logHandler))
95	defer slog.SetDefault(originalLogger)
96
97	// Write header to log file
98	fmt.Fprintf(logFile, "=== Evaluation Run: %s ===\n", runName)
99	fmt.Fprintf(logFile, "Started: %s\n", time.Now().Format(time.RFC3339))
100	fmt.Fprintf(logFile, "Agent: %s\n", agentFilename)
101	fmt.Fprintf(logFile, "Evals dir: %s\n", evalsDir)
102	fmt.Fprintf(logFile, "Judge model: %s\n", f.JudgeModel)
103	fmt.Fprintf(logFile, "Concurrency: %d\n", f.Concurrency)
104	fmt.Fprintf(logFile, "\n")
105
106	// Create tee writer to write to both console and log file
107	consoleOut := cmd.OutOrStdout()
108	teeOut := io.MultiWriter(consoleOut, logFile)
109
110	// Check if console is a TTY (for colored output)
111	isTTY := false

Callers

nothing calls this directly

Calls 12

TrackCommandFunction · 0.92

TrackCommandErrorFunction · 0.92

GenerateRunNameFunction · 0.92

NewDefaultRegistryFunction · 0.92

EvaluateFunction · 0.92

SaveRunSessionsFunction · 0.92

SaveRunSessionsJSONFunction · 0.92

ContextMethod · 0.80

NowMethod · 0.80

IsTerminalMethod · 0.80

CloseMethod · 0.65

NewMethod · 0.45

Tested by

no test coverage detected