Function main

evals/subagents/eval-planner.ts:223–396 · view source on GitHub ↗

()

Source from the content-addressed store, hash-verified

221	}
222
223	async function main() {
224	// Load the eval file
225	const evalFilePath = path.join(
226	__dirname,
227	'..',
228	'git-evals',
229	'eval-codebuff2.json',
230	)
231	const evalData: EvalData = JSON.parse(fs.readFileSync(evalFilePath, 'utf-8'))
232
233	const { repoUrl, initCommand, evalCommits } = evalData
234
235	const client = new CodebuffClient({
236	apiKey: process.env[API_KEY_ENV_VAR] \|\| getUserCredentials()?.authToken,
237	})
238
239	const agentsPath = path.join(__dirname, '../../.agents')
240	const localAgentDefinitions = Object.values(
241	await loadLocalAgents({
242	agentsPath,
243	}),
244	)
245
246	const allResults = [] as Array<{
247	sha: string
248	spec: string
249	agentOutput: string
250	judgingResults: {
251	reasoning: string
252	pros: string
253	cons: string
254	overallScore: number
255	}
256	plannerLatencyMs: number
257	}>
258
259	// Track statistics
260	const stats = {
261	total: evalCommits.length,
262	completed: 0,
263	failed: 0,
264	scores: [] as number[],
265	plannerLatencies: [] as number[],
266	}
267
268	// Loop through each eval task
269	for (const evalCommit of evalCommits) {
270	const { sha, parentSha, spec, fileStates } = evalCommit
271
272	console.log(`\n=== Running eval for commit ${sha} ===`)
273	console.log(`Spec: ${spec.substring(0, 100)}...\n`)
274
275	try {
276	const result = await evalPlannerAgent({
277	client,
278	agentId: 'implementation-planner',
279	agentDefinitions: localAgentDefinitions,
280	spec,

eval-planner.tsFile · 0.70

getUserCredentialsFunction · 0.90

loadLocalAgentsFunction · 0.90

evalPlannerAgentFunction · 0.85

parseMethod · 0.80

no test coverage detected