(model: string)
| 3397 | } |
| 3398 | |
| 3399 | export function getMaxOutputTokensForModel(model: string): number { |
| 3400 | const maxOutputTokens = getModelMaxOutputTokens(model) |
| 3401 | |
| 3402 | // Slot-reservation cap: drop default to 8k for all models. BQ p99 output |
| 3403 | // = 4,911 tokens; 32k/64k defaults over-reserve 8-16× slot capacity. |
| 3404 | // Requests hitting the cap get one clean retry at 64k (query.ts |
| 3405 | // max_output_tokens_escalate). Math.min keeps models with lower native |
| 3406 | // defaults (e.g. claude-3-opus at 4k) at their native value. Applied |
| 3407 | // before the env-var override so CLAUDE_CODE_MAX_OUTPUT_TOKENS still wins. |
| 3408 | const defaultTokens = isMaxTokensCapEnabled() |
| 3409 | ? Math.min(maxOutputTokens.default, CAPPED_DEFAULT_MAX_TOKENS) |
| 3410 | : maxOutputTokens.default |
| 3411 | |
| 3412 | const result = validateBoundedIntEnvVar( |
| 3413 | 'CLAUDE_CODE_MAX_OUTPUT_TOKENS', |
| 3414 | process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS, |
| 3415 | defaultTokens, |
| 3416 | maxOutputTokens.upperLimit, |
| 3417 | ) |
| 3418 | return result.effective |
| 3419 | } |
| 3420 |
no test coverage detected