Function estimateTokenCount

apps/sim/lib/tokenization/estimators.ts:186–223 · view source on GitHub ↗

(text: string, providerId?: string)

Source from the content-addressed store, hash-verified

184	* Estimates token count for text using provider-specific heuristics
185	*/
186	export function estimateTokenCount(text: string, providerId?: string): TokenEstimate {
187	if (!text \|\| text.length < MIN_TEXT_LENGTH_FOR_ESTIMATION) {
188	return {
189	count: 0,
190	confidence: 'high',
191	provider: providerId \|\| 'unknown',
192	method: 'fallback',
193	}
194	}
195
196	const effectiveProviderId = providerId \|\| TOKENIZATION_CONFIG.defaults.provider
197	const config = getProviderConfig(effectiveProviderId)
198
199	let estimatedTokens: number
200
201	switch (effectiveProviderId) {
202	case 'openai':
203	case 'azure-openai':
204	estimatedTokens = estimateOpenAITokens(text)
205	break
206	case 'anthropic':
207	case 'azure-anthropic':
208	estimatedTokens = estimateAnthropicTokens(text)
209	break
210	case 'google':
211	estimatedTokens = estimateGoogleTokens(text)
212	break
213	default:
214	estimatedTokens = estimateGenericTokens(text, config.avgCharsPerToken)
215	}
216
217	return {
218	count: Math.max(1, Math.round(estimatedTokens)),
219	confidence: config.confidence,
220	provider: effectiveProviderId,
221	method: 'heuristic',
222	}
223	}
224
225	/**
226	* OpenAI-specific token estimation using BPE characteristics

calculateStreamingCostFunction · 0.90

callEmbeddingAPIFunction · 0.90

recordSearchEmbeddingUsageFunction · 0.90

processDocumentAsyncFunction · 0.90

createChunkFunction · 0.90

updateChunkFunction · 0.90

route.tsFile · 0.90

estimateInputTokensFunction · 0.85

estimateOutputTokensFunction · 0.85

getProviderConfigFunction · 0.90

estimateOpenAITokensFunction · 0.85

estimateAnthropicTokensFunction · 0.85

estimateGoogleTokensFunction · 0.85

estimateGenericTokensFunction · 0.85

no test coverage detected