hub / github.com/docker/docker-agent / Evaluate

Function Evaluate

pkg/evaluation/eval.go:66–103 · view source on GitHub ↗

Evaluate runs evaluations with a specified run name. ttyOut is used for progress bar rendering (should be the console/TTY). out is used for results and status messages (can be tee'd to a log file).

(ctx context.Context, ttyOut, out io.Writer, isTTY bool, runName string, runConfig *config.RuntimeConfig, cfg Config)

Source from the content-addressed store, hash-verified

64	// ttyOut is used for progress bar rendering (should be the console/TTY).
65	// out is used for results and status messages (can be tee'd to a log file).
66	func Evaluate(ctx context.Context, ttyOut, out io.Writer, isTTY bool, runName string, runConfig config.RuntimeConfig, cfg Config) (EvalRun, error) {
67	agentSource, err := config.Resolve(cfg.AgentFilename, nil)
68	if err != nil {
69	return nil, fmt.Errorf("resolving agent: %w", err)
70	}
71
72	// Create judge model provider for relevance checking
73	judgeModel, err := createJudgeModel(ctx, cfg.JudgeModel, runConfig)
74	if err != nil {
75	return nil, err
76	}
77
78	runner := newRunner(agentSource, runConfig, judgeModel, cfg)
79
80	fmt.Fprintf(out, "Evaluation run: %s\n", runName)
81
82	startTime := time.Now()
83	results, err := runner.Run(ctx, ttyOut, out, isTTY)
84	duration := time.Since(startTime)
85
86	summary := computeSummary(results)
87	printSummary(out, summary, duration)
88
89	run := &EvalRun{
90	Name: runName,
91	Timestamp: startTime,
92	Duration: duration,
93	Config: cfg,
94	Results: results,
95	Summary: summary,
96	}
97
98	if err != nil {
99	return run, fmt.Errorf("running evaluations: %w", err)
100	}
101
102	return run, nil
103	}
104
105	// workItem represents a single evaluation to be processed.
106	type workItem struct {

Callers 1

runEvalCommandMethod · 0.92

Calls 7

ResolveFunction · 0.92

createJudgeModelFunction · 0.85

newRunnerFunction · 0.85

computeSummaryFunction · 0.85

printSummaryFunction · 0.85

NowMethod · 0.80

RunMethod · 0.65

Tested by

no test coverage detected