( messages: Anthropic.Beta.Messages.BetaMessageParam[], tools: Anthropic.Beta.Messages.BetaToolUnion[], )
| 261 | * - Bedrock with thinking blocks: uses Sonnet (Haiku 3.5 doesn't support thinking) |
| 262 | */ |
| 263 | export async function countTokensViaHaikuFallback( |
| 264 | messages: Anthropic.Beta.Messages.BetaMessageParam[], |
| 265 | tools: Anthropic.Beta.Messages.BetaToolUnion[], |
| 266 | ): Promise<number | null> { |
| 267 | const provider = getAPIProvider() |
| 268 | if (provider === 'gemini') { |
| 269 | return roughTokenCountEstimationForAPIRequest(messages, tools) |
| 270 | } |
| 271 | |
| 272 | // Check if messages contain thinking blocks |
| 273 | const containsThinking = hasThinkingBlocks(messages) |
| 274 | |
| 275 | // If we're on Vertex and using global region, always use Sonnet since Haiku is not available there. |
| 276 | const isVertexGlobalEndpoint = |
| 277 | isEnvTruthy(process.env.CLAUDE_CODE_USE_VERTEX) && |
| 278 | getVertexRegionForModel(getSmallFastModel()) === 'global' |
| 279 | // If we're on Bedrock with thinking blocks, use Sonnet since Haiku 3.5 doesn't support thinking |
| 280 | const isBedrockWithThinking = |
| 281 | isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK) && containsThinking |
| 282 | // If we're on Vertex with thinking blocks, use Sonnet since Haiku 3.5 doesn't support thinking |
| 283 | const isVertexWithThinking = |
| 284 | isEnvTruthy(process.env.CLAUDE_CODE_USE_VERTEX) && containsThinking |
| 285 | // Otherwise always use Haiku - Haiku 4.5 supports thinking blocks. |
| 286 | // WARNING: if you change this to use a non-Haiku model, this request will fail in 1P unless it uses getCLISyspromptPrefix. |
| 287 | // Note: We don't need Sonnet for tool_reference blocks because we strip them via |
| 288 | // stripSearchExtraToolsFieldsFromMessages() before sending. |
| 289 | // Use getSmallFastModel() to respect ANTHROPIC_SMALL_FAST_MODEL env var for Bedrock users |
| 290 | // with global inference profiles (see issue #10883). |
| 291 | const model = |
| 292 | isVertexGlobalEndpoint || isBedrockWithThinking || isVertexWithThinking |
| 293 | ? getDefaultSonnetModel() |
| 294 | : getSmallFastModel() |
| 295 | const anthropic = await getAnthropicClient({ |
| 296 | maxRetries: 1, |
| 297 | model, |
| 298 | source: 'count_tokens', |
| 299 | }) |
| 300 | |
| 301 | // Strip tool search-specific fields (caller, tool_reference) before sending |
| 302 | // These fields are only valid with the tool search beta header |
| 303 | const normalizedMessages = stripSearchExtraToolsFieldsFromMessages(messages) |
| 304 | |
| 305 | const messagesToSend: MessageParam[] = |
| 306 | normalizedMessages.length > 0 |
| 307 | ? (normalizedMessages as MessageParam[]) |
| 308 | : [{ role: 'user', content: 'count' }] |
| 309 | |
| 310 | const betas = getModelBetas(model) |
| 311 | // Filter betas for Vertex - some betas (like web-search) cause 400 errors |
| 312 | // on certain Vertex endpoints. See issue #10789. |
| 313 | const filteredBetas = |
| 314 | getAPIProvider() === 'vertex' |
| 315 | ? betas.filter(b => VERTEX_COUNT_TOKENS_ALLOWED_BETAS.has(b)) |
| 316 | : betas |
| 317 | |
| 318 | const apiStart = Date.now() |
| 319 | const langfuseTrace = isLangfuseEnabled() |
| 320 | ? createTrace({ |
no test coverage detected