(results)
| 121 | } |
| 122 | |
| 123 | function summarizeModes(results) { |
| 124 | const byMode = {}; |
| 125 | for (const mode of MODES) { |
| 126 | const modeResults = results.filter((result) => result.mode === mode); |
| 127 | const totals = modeResults.reduce( |
| 128 | (acc, result) => { |
| 129 | const verdict = normalizeVerdict(result.verdict); |
| 130 | acc.inputTokens += result.usage.inputTokens; |
| 131 | acc.outputTokens += result.usage.outputTokens; |
| 132 | acc.totalTokens += result.usage.totalTokens; |
| 133 | acc.durationMs += result.durationMs; |
| 134 | acc.completed += verdict.completed ? 1 : 0; |
| 135 | acc.specFacts += verdict.specFacts; |
| 136 | acc.driftedFacts += verdict.driftedFacts; |
| 137 | acc.acceptanceCriteriaTotal += verdict.acceptanceCriteriaTotal; |
| 138 | acc.acceptanceCriteriaMet += verdict.acceptanceCriteriaMet; |
| 139 | acc.contextChars += result.context?.chars ?? 0; |
| 140 | acc.contextLines += result.context?.lines ?? 0; |
| 141 | acc.contextApproxTokens += result.context?.approxTokens ?? 0; |
| 142 | return acc; |
| 143 | }, |
| 144 | { |
| 145 | inputTokens: 0, |
| 146 | outputTokens: 0, |
| 147 | totalTokens: 0, |
| 148 | durationMs: 0, |
| 149 | completed: 0, |
| 150 | specFacts: 0, |
| 151 | driftedFacts: 0, |
| 152 | acceptanceCriteriaTotal: 0, |
| 153 | acceptanceCriteriaMet: 0, |
| 154 | contextChars: 0, |
| 155 | contextLines: 0, |
| 156 | contextApproxTokens: 0, |
| 157 | }, |
| 158 | ); |
| 159 | const count = modeResults.length || 1; |
| 160 | byMode[mode] = { |
| 161 | runs: modeResults.length, |
| 162 | avgInputTokens: round(totals.inputTokens / count), |
| 163 | avgOutputTokens: round(totals.outputTokens / count), |
| 164 | avgTotalTokens: round(totals.totalTokens / count), |
| 165 | avgDurationMs: round(totals.durationMs / count), |
| 166 | parseSuccessRate: safeRatio(modeResults.filter((result) => result.verdict).length, count), |
| 167 | completionSuccessRate: safeRatio(totals.completed, count), |
| 168 | specDriftRate: safeRatio(totals.driftedFacts, totals.specFacts), |
| 169 | taskCompletionRate: safeRatio(totals.acceptanceCriteriaMet, totals.acceptanceCriteriaTotal), |
| 170 | avgContextChars: round(totals.contextChars / count), |
| 171 | avgContextLines: round(totals.contextLines / count), |
| 172 | avgContextApproxTokens: round(totals.contextApproxTokens / count), |
| 173 | }; |
| 174 | } |
| 175 | return byMode; |
| 176 | } |
| 177 | |
| 178 | function savings(byMode, totalKey, inputKey) { |
| 179 | const totalTokenSavings = byMode.off[totalKey] - byMode.beta[totalKey]; |
no test coverage detected