SaveRunSessionsJSON saves the full evaluation run output to a JSON file. The output includes run metadata (config, summary) and all sessions with their eval criteria and scoring results (pass/fail, judge reasoning, errors).
(run *EvalRun, outputDir string)
| 379 | // The output includes run metadata (config, summary) and all sessions with |
| 380 | // their eval criteria and scoring results (pass/fail, judge reasoning, errors). |
| 381 | func SaveRunSessionsJSON(run *EvalRun, outputDir string) (string, error) { |
| 382 | // Populate eval results on each session |
| 383 | for i := range run.Results { |
| 384 | populateEvalResult(&run.Results[i]) |
| 385 | } |
| 386 | |
| 387 | // Collect all sessions from results |
| 388 | var sessions []*session.Session |
| 389 | for i := range run.Results { |
| 390 | if run.Results[i].Session != nil { |
| 391 | sessions = append(sessions, run.Results[i].Session) |
| 392 | } |
| 393 | } |
| 394 | |
| 395 | output := RunOutput{ |
| 396 | Name: run.Name, |
| 397 | Timestamp: run.Timestamp, |
| 398 | Duration: run.Duration.Round(time.Millisecond).String(), |
| 399 | Config: RunOutputConfig{ |
| 400 | Agent: run.Config.AgentFilename, |
| 401 | JudgeModel: run.Config.JudgeModel, |
| 402 | Concurrency: run.Config.Concurrency, |
| 403 | EvalsDir: run.Config.EvalsDir, |
| 404 | BaseImage: run.Config.BaseImage, |
| 405 | }, |
| 406 | Summary: run.Summary, |
| 407 | Sessions: sessions, |
| 408 | } |
| 409 | |
| 410 | outputPath := filepath.Join(outputDir, run.Name+".json") |
| 411 | return saveJSON(output, outputPath) |
| 412 | } |
| 413 | |
| 414 | // populateEvalResult copies scoring data from a Result to its Session's EvalResult field. |
| 415 | func populateEvalResult(result *Result) { |