(rows: Array<EvalRow>, allLogs: Array<RunLogPayload>)
| 428 | } |
| 429 | |
| 430 | function applyStability(rows: Array<EvalRow>, allLogs: Array<RunLogPayload>) { |
| 431 | const byModel = new Map<string, Array<RunLogPayload>>() |
| 432 | for (const log of allLogs) { |
| 433 | const list = byModel.get(log.entry.model) ?? [] |
| 434 | list.push(log) |
| 435 | byModel.set(log.entry.model, list) |
| 436 | } |
| 437 | |
| 438 | for (const row of rows) { |
| 439 | const history = (byModel.get(row.model) ?? []) |
| 440 | .sort((a, b) => b.writtenAt.localeCompare(a.writtenAt)) |
| 441 | .slice(0, 5) |
| 442 | const sampleSize = history.length |
| 443 | const stableCount = history.filter(isStableRun).length |
| 444 | const rate = sampleSize > 0 ? stableCount / sampleSize : 0 |
| 445 | const tier = Math.max(1, Math.min(5, Math.round(rate * 4 + 1))) |
| 446 | row.stabilitySampleSize = sampleSize |
| 447 | row.stabilityRate = Number(rate.toFixed(2)) |
| 448 | row.stabilityTier = tier |
| 449 | } |
| 450 | } |
| 451 | |
| 452 | function finalizeRows(rows: Array<EvalRow>, allLogs: Array<RunLogPayload>) { |
| 453 | applyStability(rows, allLogs) |
no test coverage detected