(
spanId: string,
metadata: {
ttftMs?: number
ttltMs?: number
promptTokens?: number
outputTokens?: number
cacheReadTokens?: number
cacheCreationTokens?: number
messageId?: string
success?: boolean
error?: string
/** Time spent in pre-request setup (client creation, retries) before the successful attempt */
requestSetupMs?: number
/** Timestamps (Date.now()) of each attempt start — used to emit retry sub-spans */
attemptStartTimes?: number[]
},
)
| 466 | * End an API call span with response metadata |
| 467 | */ |
| 468 | export function endLLMRequestPerfettoSpan( |
| 469 | spanId: string, |
| 470 | metadata: { |
| 471 | ttftMs?: number |
| 472 | ttltMs?: number |
| 473 | promptTokens?: number |
| 474 | outputTokens?: number |
| 475 | cacheReadTokens?: number |
| 476 | cacheCreationTokens?: number |
| 477 | messageId?: string |
| 478 | success?: boolean |
| 479 | error?: string |
| 480 | /** Time spent in pre-request setup (client creation, retries) before the successful attempt */ |
| 481 | requestSetupMs?: number |
| 482 | /** Timestamps (Date.now()) of each attempt start — used to emit retry sub-spans */ |
| 483 | attemptStartTimes?: number[] |
| 484 | }, |
| 485 | ): void { |
| 486 | if (!isEnabled || !spanId) return |
| 487 | |
| 488 | const pending = pendingSpans.get(spanId) |
| 489 | if (!pending) return |
| 490 | |
| 491 | const endTime = getTimestamp() |
| 492 | const duration = endTime - pending.startTime |
| 493 | |
| 494 | const promptTokens = |
| 495 | metadata.promptTokens ?? (pending.args.prompt_tokens as number | undefined) |
| 496 | const ttftMs = metadata.ttftMs |
| 497 | const ttltMs = metadata.ttltMs |
| 498 | const outputTokens = metadata.outputTokens |
| 499 | const cacheReadTokens = metadata.cacheReadTokens |
| 500 | |
| 501 | // Compute derived metrics |
| 502 | // ITPS: input tokens per second (prompt processing speed) |
| 503 | const itps = |
| 504 | ttftMs !== undefined && promptTokens !== undefined && ttftMs > 0 |
| 505 | ? Math.round((promptTokens / (ttftMs / 1000)) * 100) / 100 |
| 506 | : undefined |
| 507 | |
| 508 | // OTPS: output tokens per second (sampling speed) |
| 509 | const samplingMs = |
| 510 | ttltMs !== undefined && ttftMs !== undefined ? ttltMs - ttftMs : undefined |
| 511 | const otps = |
| 512 | samplingMs !== undefined && outputTokens !== undefined && samplingMs > 0 |
| 513 | ? Math.round((outputTokens / (samplingMs / 1000)) * 100) / 100 |
| 514 | : undefined |
| 515 | |
| 516 | // Cache hit rate: percentage of prompt tokens from cache |
| 517 | const cacheHitRate = |
| 518 | cacheReadTokens !== undefined && |
| 519 | promptTokens !== undefined && |
| 520 | promptTokens > 0 |
| 521 | ? Math.round((cacheReadTokens / promptTokens) * 10000) / 100 |
| 522 | : undefined |
| 523 | |
| 524 | const requestSetupMs = metadata.requestSetupMs |
| 525 | const attemptStartTimes = metadata.attemptStartTimes |
no test coverage detected