(
span?: Span,
metadata?: {
inputTokens?: number
outputTokens?: number
cacheReadTokens?: number
cacheCreationTokens?: number
success?: boolean
statusCode?: number
error?: string
attempt?: number
modelResponse?: string
/** Text output from the model (non-thinking content) */
modelOutput?: string
/** Thinking/reasoning output from the model */
thinkingOutput?: string
/** Whether the output included tool calls (look at tool spans for details) */
hasToolCall?: boolean
/** Time to first token in milliseconds */
ttftMs?: number
/** Time spent in pre-request setup before the successful attempt */
requestSetupMs?: number
/** Timestamps (Date.now()) of each attempt start — used to emit retry sub-spans */
attemptStartTimes?: number[]
},
)
| 351 | * If not provided, falls back to finding the most recent llm_request span (legacy behavior). |
| 352 | */ |
| 353 | export function endLLMRequestSpan( |
| 354 | span?: Span, |
| 355 | metadata?: { |
| 356 | inputTokens?: number |
| 357 | outputTokens?: number |
| 358 | cacheReadTokens?: number |
| 359 | cacheCreationTokens?: number |
| 360 | success?: boolean |
| 361 | statusCode?: number |
| 362 | error?: string |
| 363 | attempt?: number |
| 364 | modelResponse?: string |
| 365 | /** Text output from the model (non-thinking content) */ |
| 366 | modelOutput?: string |
| 367 | /** Thinking/reasoning output from the model */ |
| 368 | thinkingOutput?: string |
| 369 | /** Whether the output included tool calls (look at tool spans for details) */ |
| 370 | hasToolCall?: boolean |
| 371 | /** Time to first token in milliseconds */ |
| 372 | ttftMs?: number |
| 373 | /** Time spent in pre-request setup before the successful attempt */ |
| 374 | requestSetupMs?: number |
| 375 | /** Timestamps (Date.now()) of each attempt start — used to emit retry sub-spans */ |
| 376 | attemptStartTimes?: number[] |
| 377 | }, |
| 378 | ): void { |
| 379 | let llmSpanContext: SpanContext | undefined |
| 380 | |
| 381 | if (span) { |
| 382 | // Use the provided span directly - this is the correct approach for parallel requests |
| 383 | const spanId = getSpanId(span) |
| 384 | llmSpanContext = activeSpans.get(spanId)?.deref() |
| 385 | } else { |
| 386 | // Legacy fallback: find the most recent llm_request span |
| 387 | // WARNING: This can cause mismatched responses when multiple requests are in flight |
| 388 | llmSpanContext = Array.from(activeSpans.values()) |
| 389 | .findLast(r => { |
| 390 | const ctx = r.deref() |
| 391 | return ( |
| 392 | ctx?.attributes['span.type'] === 'llm_request' || |
| 393 | ctx?.attributes['model'] |
| 394 | ) |
| 395 | }) |
| 396 | ?.deref() |
| 397 | } |
| 398 | |
| 399 | if (!llmSpanContext) { |
| 400 | // Span was already ended or never tracked |
| 401 | return |
| 402 | } |
| 403 | |
| 404 | const duration = Date.now() - llmSpanContext.startTime |
| 405 | |
| 406 | // End Perfetto span with full metadata |
| 407 | if (llmSpanContext.perfettoSpanId) { |
| 408 | endLLMRequestPerfettoSpan(llmSpanContext.perfettoSpanId, { |
| 409 | ttftMs: metadata?.ttftMs, |
| 410 | ttltMs: duration, // Time to last token is the total duration |
no test coverage detected