Function finalizeRows

packages/ai-code-mode/models-eval/run-eval.ts:452–489 · view source on GitHub ↗

(rows: Array<EvalRow>, allLogs: Array<RunLogPayload>)

Source from the content-addressed store, hash-verified

450	}
451
452	function finalizeRows(rows: Array<EvalRow>, allLogs: Array<RunLogPayload>) {
453	applyStability(rows, allLogs)
454
455	bucketedTiers(
456	rows,
457	(row) => row.durationMs,
458	(row, tier) => {
459	row.speedTier = tier
460	},
461	true,
462	)
463	bucketedTiers(
464	rows,
465	(row) => row.tokenEfficiency,
466	(row, tier) => {
467	row.tokenEfficiencyTier = tier
468	},
469	true,
470	)
471
472	for (const row of rows) {
473	if (!row.judge) continue
474	const rating = computeStarRating({
475	accuracy: row.judge.accuracy,
476	comprehensiveness: row.judge.comprehensiveness,
477	typescriptQuality: row.judge.typescriptQuality,
478	codeModeEfficiency: row.judge.codeModeEfficiency,
479	speedTier: row.speedTier,
480	tokenEfficiencyTier: row.tokenEfficiencyTier,
481	stabilityTier: row.stabilityTier,
482	compilationFailures: row.compilationFailures,
483	runtimeFailures: row.runtimeFailures,
484	totalExecuteCalls: row.totalExecuteCalls,
485	})
486	row.stars = rating.stars
487	row.weightedScore = rating.weightedScore
488	}
489	}
490
491	function listRunLogFiles(): Array<string> {
492	if (!existsSync(LOG_DIR)) return []

judgeLatestSessionFunction · 0.85

mainFunction · 0.85

computeStarRatingFunction · 0.90

applyStabilityFunction · 0.85

bucketedTiersFunction · 0.85

no test coverage detected