( messages: Anthropic.Beta.Messages.BetaMessageParam[], tools: Anthropic.Beta.Messages.BetaToolUnion[], )
| 249 | * - Bedrock with thinking blocks: uses Sonnet (Haiku 3.5 doesn't support thinking) |
| 250 | */ |
| 251 | export async function countTokensViaHaikuFallback( |
| 252 | messages: Anthropic.Beta.Messages.BetaMessageParam[], |
| 253 | tools: Anthropic.Beta.Messages.BetaToolUnion[], |
| 254 | ): Promise<number | null> { |
| 255 | // Check if messages contain thinking blocks |
| 256 | const containsThinking = hasThinkingBlocks(messages) |
| 257 | |
| 258 | // If we're on Vertex and using global region, always use Sonnet since Haiku is not available there. |
| 259 | const isVertexGlobalEndpoint = |
| 260 | isEnvTruthy(process.env.CLAUDE_CODE_USE_VERTEX) && |
| 261 | getVertexRegionForModel(getSmallFastModel()) === 'global' |
| 262 | // If we're on Bedrock with thinking blocks, use Sonnet since Haiku 3.5 doesn't support thinking |
| 263 | const isBedrockWithThinking = |
| 264 | isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK) && containsThinking |
| 265 | // If we're on Vertex with thinking blocks, use Sonnet since Haiku 3.5 doesn't support thinking |
| 266 | const isVertexWithThinking = |
| 267 | isEnvTruthy(process.env.CLAUDE_CODE_USE_VERTEX) && containsThinking |
| 268 | // Otherwise always use Haiku - Haiku 4.5 supports thinking blocks. |
| 269 | // WARNING: if you change this to use a non-Haiku model, this request will fail in 1P unless it uses getCLISyspromptPrefix. |
| 270 | // Note: We don't need Sonnet for tool_reference blocks because we strip them via |
| 271 | // stripToolSearchFieldsFromMessages() before sending. |
| 272 | // Use getSmallFastModel() to respect ANTHROPIC_SMALL_FAST_MODEL env var for Bedrock users |
| 273 | // with global inference profiles (see issue #10883). |
| 274 | const model = |
| 275 | isVertexGlobalEndpoint || isBedrockWithThinking || isVertexWithThinking |
| 276 | ? getDefaultSonnetModel() |
| 277 | : getSmallFastModel() |
| 278 | const anthropic = await getAnthropicClient({ |
| 279 | maxRetries: 1, |
| 280 | model, |
| 281 | source: 'count_tokens', |
| 282 | }) |
| 283 | |
| 284 | // Strip tool search-specific fields (caller, tool_reference) before sending |
| 285 | // These fields are only valid with the tool search beta header |
| 286 | const normalizedMessages = stripToolSearchFieldsFromMessages(messages) |
| 287 | |
| 288 | const messagesToSend: MessageParam[] = |
| 289 | normalizedMessages.length > 0 |
| 290 | ? (normalizedMessages as MessageParam[]) |
| 291 | : [{ role: 'user', content: 'count' }] |
| 292 | |
| 293 | const betas = getModelBetas(model) |
| 294 | // Filter betas for Vertex - some betas (like web-search) cause 400 errors |
| 295 | // on certain Vertex endpoints. See issue #10789. |
| 296 | const filteredBetas = |
| 297 | getAPIProvider() === 'vertex' |
| 298 | ? betas.filter(b => VERTEX_COUNT_TOKENS_ALLOWED_BETAS.has(b)) |
| 299 | : betas |
| 300 | |
| 301 | // biome-ignore lint/plugin: token counting needs specialized parameters (thinking, betas) that sideQuery doesn't support |
| 302 | const response = await anthropic.beta.messages.create({ |
| 303 | model: normalizeModelStringForAPI(model), |
| 304 | max_tokens: containsThinking ? TOKEN_COUNT_MAX_TOKENS : 1, |
| 305 | messages: messagesToSend, |
| 306 | tools: tools.length > 0 ? tools : undefined, |
| 307 | ...(filteredBetas.length > 0 && { betas: filteredBetas }), |
| 308 | metadata: getAPIMetadata(), |
no test coverage detected