({
client,
traces,
codingAgentPrompt,
analyzerContext,
}: {
client: CodebuffClient
traces: AgentTraceData[]
codingAgentPrompt: string
analyzerContext: {
agentDefinitions: any[]
agentTypeDefinition: string
testedAgentIds: string[]
}
})
| 108 | } |
| 109 | |
| 110 | export async function analyzeAgentTraces({ |
| 111 | client, |
| 112 | traces, |
| 113 | codingAgentPrompt, |
| 114 | analyzerContext, |
| 115 | }: { |
| 116 | client: CodebuffClient |
| 117 | traces: AgentTraceData[] |
| 118 | codingAgentPrompt: string |
| 119 | analyzerContext: { |
| 120 | agentDefinitions: any[] |
| 121 | agentTypeDefinition: string |
| 122 | testedAgentIds: string[] |
| 123 | } |
| 124 | }): Promise<{ |
| 125 | overallAnalysis: string |
| 126 | agentFeedback: Array<{ |
| 127 | agentId: string |
| 128 | strengths: string[] |
| 129 | weaknesses: string[] |
| 130 | recommendations: string[] |
| 131 | }> |
| 132 | }> { |
| 133 | try { |
| 134 | const truncatedTraces = traces.map((t) => ({ |
| 135 | agentId: t.agentId, |
| 136 | trace: truncateTrace(t.trace), |
| 137 | judgeResult: t.judgeResult, |
| 138 | cost: t.cost, |
| 139 | durationMs: t.durationMs, |
| 140 | error: t.error, |
| 141 | })) |
| 142 | |
| 143 | // Filter agent definitions to only include tested agents |
| 144 | const filteredAgentDefinitions = analyzerContext.agentDefinitions.filter( |
| 145 | (def) => analyzerContext.testedAgentIds.includes(def.id), |
| 146 | ) |
| 147 | |
| 148 | const prompt = `## Agent Definitions Being Evaluated |
| 149 | |
| 150 | Below are the complete agent definitions for the agents being tested. Use this to understand their configuration, tools, prompts, and capabilities. |
| 151 | |
| 152 | ${JSON.stringify(filteredAgentDefinitions, null, 2)} |
| 153 | |
| 154 | ## Agent Type Definition Reference |
| 155 | |
| 156 | For reference, here is the TypeScript type definition that agents use: |
| 157 | |
| 158 | \`\`\`typescript |
| 159 | ${analyzerContext.agentTypeDefinition} |
| 160 | \`\`\` |
| 161 | |
| 162 | ## Coding Agent Prompt (for context) |
| 163 | ${codingAgentPrompt} |
| 164 | |
| 165 | ## Agent Traces and Results |
| 166 | ${JSON.stringify(truncatedTraces, null, 2)} |
| 167 |
no test coverage detected