hub / github.com/codeaashu/claude-code / endLLMRequestPerfettoSpan

Function endLLMRequestPerfettoSpan

src/utils/telemetry/perfettoTracing.ts:468–685 · view source on GitHub ↗

(
  spanId: string,
  metadata: {
    ttftMs?: number
    ttltMs?: number
    promptTokens?: number
    outputTokens?: number
    cacheReadTokens?: number
    cacheCreationTokens?: number
    messageId?: string
    success?: boolean
    error?: string
    /** Time spent in pre-request setup (client creation, retries) before the successful attempt */
    requestSetupMs?: number
    /** Timestamps (Date.now()) of each attempt start — used to emit retry sub-spans */
    attemptStartTimes?: number[]
  },
)

Source from the content-addressed store, hash-verified

466	* End an API call span with response metadata
467	*/
468	export function endLLMRequestPerfettoSpan(
469	spanId: string,
470	metadata: {
471	ttftMs?: number
472	ttltMs?: number
473	promptTokens?: number
474	outputTokens?: number
475	cacheReadTokens?: number
476	cacheCreationTokens?: number
477	messageId?: string
478	success?: boolean
479	error?: string
480	/** Time spent in pre-request setup (client creation, retries) before the successful attempt */
481	requestSetupMs?: number
482	/** Timestamps (Date.now()) of each attempt start — used to emit retry sub-spans */
483	attemptStartTimes?: number[]
484	},
485	): void {
486	if (!isEnabled \|\| !spanId) return
487
488	const pending = pendingSpans.get(spanId)
489	if (!pending) return
490
491	const endTime = getTimestamp()
492	const duration = endTime - pending.startTime
493
494	const promptTokens =
495	metadata.promptTokens ?? (pending.args.prompt_tokens as number \| undefined)
496	const ttftMs = metadata.ttftMs
497	const ttltMs = metadata.ttltMs
498	const outputTokens = metadata.outputTokens
499	const cacheReadTokens = metadata.cacheReadTokens
500
501	// Compute derived metrics
502	// ITPS: input tokens per second (prompt processing speed)
503	const itps =
504	ttftMs !== undefined && promptTokens !== undefined && ttftMs > 0
505	? Math.round((promptTokens / (ttftMs / 1000)) * 100) / 100
506	: undefined
507
508	// OTPS: output tokens per second (sampling speed)
509	const samplingMs =
510	ttltMs !== undefined && ttftMs !== undefined ? ttltMs - ttftMs : undefined
511	const otps =
512	samplingMs !== undefined && outputTokens !== undefined && samplingMs > 0
513	? Math.round((outputTokens / (samplingMs / 1000)) * 100) / 100
514	: undefined
515
516	// Cache hit rate: percentage of prompt tokens from cache
517	const cacheHitRate =
518	cacheReadTokens !== undefined &&
519	promptTokens !== undefined &&
520	promptTokens > 0
521	? Math.round((cacheReadTokens / promptTokens) * 10000) / 100
522	: undefined
523
524	const requestSetupMs = metadata.requestSetupMs
525	const attemptStartTimes = metadata.attemptStartTimes

Callers 1

endLLMRequestSpanFunction · 0.85

Calls 4

getTimestampFunction · 0.85

getMethod · 0.65

deleteMethod · 0.65

pushMethod · 0.45

Tested by

no test coverage detected