(cmd *cobra.Command, args []string)
| 52 | } |
| 53 | |
| 54 | func (f *evalFlags) runEvalCommand(cmd *cobra.Command, args []string) (commandErr error) { |
| 55 | telemetry.TrackCommand(cmd.Context(), "eval", args) |
| 56 | defer func() { // do not inline this defer so that commandErr is not resolved early |
| 57 | telemetry.TrackCommandError(cmd.Context(), "eval", args, commandErr) |
| 58 | }() |
| 59 | |
| 60 | ctx := cmd.Context() |
| 61 | agentFilename := args[0] |
| 62 | evalsDir := "./evals" |
| 63 | if len(args) >= 2 { |
| 64 | evalsDir = args[1] |
| 65 | } |
| 66 | |
| 67 | // Output directory defaults to <evals-dir>/results |
| 68 | outputDir := f.outputDir |
| 69 | if outputDir == "" { |
| 70 | outputDir = filepath.Join(evalsDir, "results") |
| 71 | } |
| 72 | |
| 73 | // Create output directory |
| 74 | if err := os.MkdirAll(outputDir, 0o700); err != nil { |
| 75 | return fmt.Errorf("creating output directory: %w", err) |
| 76 | } |
| 77 | |
| 78 | // Generate run name upfront so we can set up logging |
| 79 | runName := evaluation.GenerateRunName() |
| 80 | |
| 81 | // Set up log file with debug logging |
| 82 | logPath := filepath.Join(outputDir, runName+".log") |
| 83 | logFile, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o600) |
| 84 | if err != nil { |
| 85 | return fmt.Errorf("creating log file: %w", err) |
| 86 | } |
| 87 | defer logFile.Close() |
| 88 | |
| 89 | // Set up slog to write debug logs to the log file |
| 90 | logHandler := slog.NewTextHandler(logFile, &slog.HandlerOptions{ |
| 91 | Level: slog.LevelDebug, |
| 92 | }) |
| 93 | originalLogger := slog.Default() |
| 94 | slog.SetDefault(slog.New(logHandler)) |
| 95 | defer slog.SetDefault(originalLogger) |
| 96 | |
| 97 | // Write header to log file |
| 98 | fmt.Fprintf(logFile, "=== Evaluation Run: %s ===\n", runName) |
| 99 | fmt.Fprintf(logFile, "Started: %s\n", time.Now().Format(time.RFC3339)) |
| 100 | fmt.Fprintf(logFile, "Agent: %s\n", agentFilename) |
| 101 | fmt.Fprintf(logFile, "Evals dir: %s\n", evalsDir) |
| 102 | fmt.Fprintf(logFile, "Judge model: %s\n", f.JudgeModel) |
| 103 | fmt.Fprintf(logFile, "Concurrency: %d\n", f.Concurrency) |
| 104 | fmt.Fprintf(logFile, "\n") |
| 105 | |
| 106 | // Create tee writer to write to both console and log file |
| 107 | consoleOut := cmd.OutOrStdout() |
| 108 | teeOut := io.MultiWriter(consoleOut, logFile) |
| 109 | |
| 110 | // Check if console is a TTY (for colored output) |
| 111 | isTTY := false |
nothing calls this directly
no test coverage detected