| 53 | } |
| 54 | |
| 55 | async function runTask( |
| 56 | client: CodebuffClient, |
| 57 | task: TaskDefinition, |
| 58 | agentDefinitions: AgentDefinition[], |
| 59 | taskIndex: number, |
| 60 | ): Promise<{ success: boolean; traceFile: string; output: unknown }> { |
| 61 | const events: TraceEvent[] = [] |
| 62 | const startTime = Date.now() |
| 63 | |
| 64 | console.log(`\n${'='.repeat(60)}`) |
| 65 | console.log(`Task ${taskIndex}: ${task.name}`) |
| 66 | console.log(`Prompt: ${task.prompt}`) |
| 67 | console.log(`${'='.repeat(60)}\n`) |
| 68 | |
| 69 | const runState = await client.run({ |
| 70 | agent: 'browser-use', |
| 71 | prompt: task.prompt, |
| 72 | params: task.url ? { url: task.url } : undefined, |
| 73 | agentDefinitions, |
| 74 | maxAgentSteps: 30, |
| 75 | handleEvent: (event) => { |
| 76 | events.push({ |
| 77 | timestamp: new Date().toISOString(), |
| 78 | type: event.type, |
| 79 | data: event as Record<string, unknown>, |
| 80 | }) |
| 81 | |
| 82 | if (event.type === 'text') { |
| 83 | process.stdout.write(event.text ?? '') |
| 84 | } else if (event.type === 'tool_call') { |
| 85 | console.log(`\n[Tool Call] ${event.toolName}`) |
| 86 | } else if (event.type === 'tool_result') { |
| 87 | const preview = JSON.stringify(event.output)?.slice(0, 200) |
| 88 | console.log(`[Tool Result] ${preview}...`) |
| 89 | } else if (event.type === 'error') { |
| 90 | console.error(`[Error] ${event.message}`) |
| 91 | } else if (event.type === 'subagent_start') { |
| 92 | console.log(`[Subagent Start] ${event.agentType}`) |
| 93 | } else if (event.type === 'subagent_finish') { |
| 94 | console.log(`[Subagent Finish] ${event.agentType}`) |
| 95 | } |
| 96 | }, |
| 97 | }) |
| 98 | |
| 99 | const duration = ((Date.now() - startTime) / 1000).toFixed(1) |
| 100 | const output = runState.output |
| 101 | |
| 102 | const trace = { |
| 103 | task: { |
| 104 | name: task.name, |
| 105 | prompt: task.prompt, |
| 106 | url: task.url, |
| 107 | }, |
| 108 | duration: `${duration}s`, |
| 109 | output, |
| 110 | eventCount: events.length, |
| 111 | events, |
| 112 | } |