(results []Result)
| 63 | } |
| 64 | |
| 65 | func computeSummary(results []Result) Summary { |
| 66 | summary := Summary{ |
| 67 | TotalEvals: len(results), |
| 68 | } |
| 69 | |
| 70 | for _, r := range results { |
| 71 | summary.TotalCost += r.Cost |
| 72 | if r.Error != "" { |
| 73 | summary.FailedEvals += 1 |
| 74 | continue |
| 75 | } |
| 76 | |
| 77 | if r.SizeExpected != "" { |
| 78 | summary.SizesTotal++ |
| 79 | if r.SizeExpected == r.Size { |
| 80 | summary.SizesPassed++ |
| 81 | } |
| 82 | } |
| 83 | |
| 84 | if r.ToolCallsExpected > 0 { |
| 85 | summary.ToolsF1Sum += r.ToolCallsScore |
| 86 | summary.ToolsCount++ |
| 87 | } |
| 88 | |
| 89 | summary.RelevanceTotal += r.RelevanceExpected |
| 90 | summary.RelevancePassed += r.RelevancePassed |
| 91 | } |
| 92 | |
| 93 | return summary |
| 94 | } |
| 95 | |
| 96 | // printSummary outputs the evaluation summary to the writer. |
| 97 | func printSummary(out io.Writer, summary Summary, duration time.Duration) { |
no outgoing calls