()
| 145 | } |
| 146 | |
| 147 | async function main() { |
| 148 | fs.mkdirSync(TRACE_DIR, { recursive: true }) |
| 149 | |
| 150 | const taskIndexArg = process.argv[2] |
| 151 | const tasksToRun = |
| 152 | taskIndexArg !== undefined |
| 153 | ? [{ task: TASKS[parseInt(taskIndexArg, 10)], index: parseInt(taskIndexArg, 10) }] |
| 154 | : TASKS.map((task, index) => ({ task, index })) |
| 155 | |
| 156 | if (tasksToRun.some((t) => !t.task)) { |
| 157 | console.error(`Invalid task index: ${taskIndexArg}. Available: 0-${TASKS.length - 1}`) |
| 158 | process.exit(1) |
| 159 | } |
| 160 | |
| 161 | const agents = await loadLocalAgents({ agentsPath: path.join(process.cwd(), 'agents'), verbose: true }) |
| 162 | const agentDefinitions = Object.values(agents) as AgentDefinition[] |
| 163 | |
| 164 | const browserAgent = agentDefinitions.find((a) => a.id === 'browser-use') |
| 165 | if (!browserAgent) { |
| 166 | console.error('browser-use agent not found in agents/ directory') |
| 167 | process.exit(1) |
| 168 | } |
| 169 | console.log(`Loaded browser-use agent (model: ${browserAgent.model})`) |
| 170 | |
| 171 | const client = new CodebuffClient({ |
| 172 | apiKey: process.env.CODEBUFF_API_KEY, |
| 173 | cwd: process.cwd(), |
| 174 | }) |
| 175 | |
| 176 | const results: Array<{ name: string; success: boolean; traceFile: string }> = [] |
| 177 | |
| 178 | for (const { task, index } of tasksToRun) { |
| 179 | const result = await runTask(client, task, agentDefinitions, index) |
| 180 | results.push({ name: task.name, success: result.success, traceFile: result.traceFile }) |
| 181 | } |
| 182 | |
| 183 | console.log(`\n${'='.repeat(60)}`) |
| 184 | console.log('SUMMARY') |
| 185 | console.log(`${'='.repeat(60)}`) |
| 186 | for (const r of results) { |
| 187 | console.log(` ${r.success ? '✅' : '❌'} ${r.name} → ${r.traceFile}`) |
| 188 | } |
| 189 | const passed = results.filter((r) => r.success).length |
| 190 | console.log(`\n${passed}/${results.length} tasks passed`) |
| 191 | } |
| 192 | |
| 193 | if (import.meta.main) { |
| 194 | main().catch((err) => { |
no test coverage detected