hub / github.com/codeaashu/claude-code / endLLMRequestSpan

Function endLLMRequestSpan

src/utils/telemetry/sessionTracing.ts:353–464 · view source on GitHub ↗

(
  span?: Span,
  metadata?: {
    inputTokens?: number
    outputTokens?: number
    cacheReadTokens?: number
    cacheCreationTokens?: number
    success?: boolean
    statusCode?: number
    error?: string
    attempt?: number
    modelResponse?: string
    /** Text output from the model (non-thinking content) */
    modelOutput?: string
    /** Thinking/reasoning output from the model */
    thinkingOutput?: string
    /** Whether the output included tool calls (look at tool spans for details) */
    hasToolCall?: boolean
    /** Time to first token in milliseconds */
    ttftMs?: number
    /** Time spent in pre-request setup before the successful attempt */
    requestSetupMs?: number
    /** Timestamps (Date.now()) of each attempt start — used to emit retry sub-spans */
    attemptStartTimes?: number[]
  },
)

Source from the content-addressed store, hash-verified

351	* If not provided, falls back to finding the most recent llm_request span (legacy behavior).
352	*/
353	export function endLLMRequestSpan(
354	span?: Span,
355	metadata?: {
356	inputTokens?: number
357	outputTokens?: number
358	cacheReadTokens?: number
359	cacheCreationTokens?: number
360	success?: boolean
361	statusCode?: number
362	error?: string
363	attempt?: number
364	modelResponse?: string
365	/** Text output from the model (non-thinking content) */
366	modelOutput?: string
367	/** Thinking/reasoning output from the model */
368	thinkingOutput?: string
369	/** Whether the output included tool calls (look at tool spans for details) */
370	hasToolCall?: boolean
371	/** Time to first token in milliseconds */
372	ttftMs?: number
373	/** Time spent in pre-request setup before the successful attempt */
374	requestSetupMs?: number
375	/** Timestamps (Date.now()) of each attempt start — used to emit retry sub-spans */
376	attemptStartTimes?: number[]
377	},
378	): void {
379	let llmSpanContext: SpanContext \| undefined
380
381	if (span) {
382	// Use the provided span directly - this is the correct approach for parallel requests
383	const spanId = getSpanId(span)
384	llmSpanContext = activeSpans.get(spanId)?.deref()
385	} else {
386	// Legacy fallback: find the most recent llm_request span
387	// WARNING: This can cause mismatched responses when multiple requests are in flight
388	llmSpanContext = Array.from(activeSpans.values())
389	.findLast(r => {
390	const ctx = r.deref()
391	return (
392	ctx?.attributes['span.type'] === 'llm_request' \|\|
393	ctx?.attributes['model']
394	)
395	})
396	?.deref()
397	}
398
399	if (!llmSpanContext) {
400	// Span was already ended or never tracked
401	return
402	}
403
404	const duration = Date.now() - llmSpanContext.startTime
405
406	// End Perfetto span with full metadata
407	if (llmSpanContext.perfettoSpanId) {
408	endLLMRequestPerfettoSpan(llmSpanContext.perfettoSpanId, {
409	ttftMs: metadata?.ttftMs,
410	ttltMs: duration, // Time to last token is the total duration

Callers 2

logAPIErrorFunction · 0.85

logAPISuccessAndDurationFunction · 0.85

Calls 8

getSpanIdFunction · 0.85

endLLMRequestPerfettoSpanFunction · 0.85

isAnyTracingEnabledFunction · 0.85

addBetaLLMResponseAttributesFunction · 0.85

valuesMethod · 0.80

getMethod · 0.65

deleteMethod · 0.65

endMethod · 0.45

Tested by

no test coverage detected