MCPcopy Index your code
hub / github.com/codeaashu/claude-code / endLLMRequestSpan

Function endLLMRequestSpan

src/utils/telemetry/sessionTracing.ts:353–464  ·  view source on GitHub ↗
(
  span?: Span,
  metadata?: {
    inputTokens?: number
    outputTokens?: number
    cacheReadTokens?: number
    cacheCreationTokens?: number
    success?: boolean
    statusCode?: number
    error?: string
    attempt?: number
    modelResponse?: string
    /** Text output from the model (non-thinking content) */
    modelOutput?: string
    /** Thinking/reasoning output from the model */
    thinkingOutput?: string
    /** Whether the output included tool calls (look at tool spans for details) */
    hasToolCall?: boolean
    /** Time to first token in milliseconds */
    ttftMs?: number
    /** Time spent in pre-request setup before the successful attempt */
    requestSetupMs?: number
    /** Timestamps (Date.now()) of each attempt start — used to emit retry sub-spans */
    attemptStartTimes?: number[]
  },
)

Source from the content-addressed store, hash-verified

351 * If not provided, falls back to finding the most recent llm_request span (legacy behavior).
352 */
353export function endLLMRequestSpan(
354 span?: Span,
355 metadata?: {
356 inputTokens?: number
357 outputTokens?: number
358 cacheReadTokens?: number
359 cacheCreationTokens?: number
360 success?: boolean
361 statusCode?: number
362 error?: string
363 attempt?: number
364 modelResponse?: string
365 /** Text output from the model (non-thinking content) */
366 modelOutput?: string
367 /** Thinking/reasoning output from the model */
368 thinkingOutput?: string
369 /** Whether the output included tool calls (look at tool spans for details) */
370 hasToolCall?: boolean
371 /** Time to first token in milliseconds */
372 ttftMs?: number
373 /** Time spent in pre-request setup before the successful attempt */
374 requestSetupMs?: number
375 /** Timestamps (Date.now()) of each attempt start — used to emit retry sub-spans */
376 attemptStartTimes?: number[]
377 },
378): void {
379 let llmSpanContext: SpanContext | undefined
380
381 if (span) {
382 // Use the provided span directly - this is the correct approach for parallel requests
383 const spanId = getSpanId(span)
384 llmSpanContext = activeSpans.get(spanId)?.deref()
385 } else {
386 // Legacy fallback: find the most recent llm_request span
387 // WARNING: This can cause mismatched responses when multiple requests are in flight
388 llmSpanContext = Array.from(activeSpans.values())
389 .findLast(r => {
390 const ctx = r.deref()
391 return (
392 ctx?.attributes['span.type'] === 'llm_request' ||
393 ctx?.attributes['model']
394 )
395 })
396 ?.deref()
397 }
398
399 if (!llmSpanContext) {
400 // Span was already ended or never tracked
401 return
402 }
403
404 const duration = Date.now() - llmSpanContext.startTime
405
406 // End Perfetto span with full metadata
407 if (llmSpanContext.perfettoSpanId) {
408 endLLMRequestPerfettoSpan(llmSpanContext.perfettoSpanId, {
409 ttftMs: metadata?.ttftMs,
410 ttltMs: duration, // Time to last token is the total duration

Callers 2

logAPIErrorFunction · 0.85
logAPISuccessAndDurationFunction · 0.85

Calls 8

getSpanIdFunction · 0.85
isAnyTracingEnabledFunction · 0.85
valuesMethod · 0.80
getMethod · 0.65
deleteMethod · 0.65
endMethod · 0.45

Tested by

no test coverage detected