hub / github.com/CodebuffAI/codebuff / run

Method run

evals/buffbench/runners/codex.ts:14–142 · view source on GitHub ↗

(prompt: string)

Source from the content-addressed store, hash-verified

12	}
13
14	async run(prompt: string): Promise<RunnerResult> {
15	const steps: AgentStep[] = []
16	let totalCostUsd = 0
17
18	return new Promise((resolve, reject) => {
19	// Codex CLI uses the prompt as a positional argument
20	// Use exec subcommand with --full-auto for automatic execution
21	// --full-auto enables -a on-failure and --sandbox workspace-write
22	// Use --json for structured output that we can parse
23	const args = [
24	'exec',
25	'--full-auto',
26	'--json',
27	'-m',
28	'gpt-5.1-codex',
29	prompt,
30	]
31
32	console.log(`[CodexRunner] Running: codex ${args.join(' ')}`)
33
34	const child = spawn('codex', args, {
35	cwd: this.cwd,
36	env: {
37	...process.env,
38	...this.env,
39	CODEX_API_KEY: process.env.OPENAI_API_KEY \|\| this.env.OPENAI_API_KEY,
40	},
41	// Use 'ignore' for stdin to prevent the CLI from waiting for input
42	stdio: ['ignore', 'pipe', 'pipe'],
43	})
44
45	let _stdout = ''
46	let stderr = ''
47
48	child.stdout.on('data', (data: Buffer) => {
49	const chunk = data.toString()
50	_stdout += chunk
51	process.stdout.write(chunk)
52
53	// Codex outputs events as JSON lines in some modes
54	const lines = chunk.split('\n').filter((line) => line.trim())
55	for (const line of lines) {
56	try {
57	const event = JSON.parse(line)
58	if (event.type === 'message') {
59	steps.push({
60	type: 'text',
61	text: event.content \|\| event.message \|\| '',
62	})
63	} else if (
64	event.type === 'function_call' \|\|
65	event.type === 'tool'
66	) {
67	steps.push({
68	type: 'tool_call',
69	toolName: event.name \|\| event.function?.name \|\| 'unknown',
70	toolCallId: event.id \|\| `codex-${Date.now()}`,
71	input: event.arguments \|\| event.function?.arguments \|\| {},

Callers 7

evalPlannerAgentFunction · 0.45

runAgentOnCommitFunction · 0.45

analyzeAllTasksFunction · 0.45

analyzeAgentTracesFunction · 0.45

generateEvalTaskFunction · 0.45

runSingleJudgeFunction · 0.45

extractAgentLessonsFunction · 0.45

Calls 3

parseMethod · 0.80

onMethod · 0.65

resolveFunction · 0.50

Tested by

no test coverage detected