( messages: Message[], )
| 77 | * Both paths exclude cache tokens to match #304930's formula. |
| 78 | */ |
| 79 | export function finalContextTokensFromLastResponse( |
| 80 | messages: Message[], |
| 81 | ): number { |
| 82 | let i = messages.length - 1 |
| 83 | while (i >= 0) { |
| 84 | const message = messages[i] |
| 85 | const usage = message ? getTokenUsage(message) : undefined |
| 86 | if (usage) { |
| 87 | // Stainless types don't include iterations yet — cast like advisor.ts:43 |
| 88 | const iterations = ( |
| 89 | usage as { |
| 90 | iterations?: Array<{ |
| 91 | input_tokens: number |
| 92 | output_tokens: number |
| 93 | }> | null |
| 94 | } |
| 95 | ).iterations |
| 96 | if (iterations && iterations.length > 0) { |
| 97 | const last = iterations.at(-1)! |
| 98 | return last.input_tokens + last.output_tokens |
| 99 | } |
| 100 | // No iterations → no server tool loop → top-level usage IS the final |
| 101 | // window. Match the iterations path's formula (input + output, no cache) |
| 102 | // rather than getTokenCountFromUsage — #304930 defines final window as |
| 103 | // non-cache input + output. Whether the server's budget countdown |
| 104 | // (renderer.py:292 calculate_context_tokens) counts cache the same way |
| 105 | // is an open question; aligning with the iterations path keeps the two |
| 106 | // branches consistent until that's resolved. |
| 107 | return usage.input_tokens + usage.output_tokens |
| 108 | } |
| 109 | i-- |
| 110 | } |
| 111 | return 0 |
| 112 | } |
| 113 | |
| 114 | /** |
| 115 | * Get only the output_tokens from the last API response. |
no test coverage detected