MCPcopy Index your code
hub / github.com/simstudioai/sim / estimateTokenCount

Function estimateTokenCount

apps/sim/lib/tokenization/estimators.ts:186–223  ·  view source on GitHub ↗
(text: string, providerId?: string)

Source from the content-addressed store, hash-verified

184 * Estimates token count for text using provider-specific heuristics
185 */
186export function estimateTokenCount(text: string, providerId?: string): TokenEstimate {
187 if (!text || text.length < MIN_TEXT_LENGTH_FOR_ESTIMATION) {
188 return {
189 count: 0,
190 confidence: 'high',
191 provider: providerId || 'unknown',
192 method: 'fallback',
193 }
194 }
195
196 const effectiveProviderId = providerId || TOKENIZATION_CONFIG.defaults.provider
197 const config = getProviderConfig(effectiveProviderId)
198
199 let estimatedTokens: number
200
201 switch (effectiveProviderId) {
202 case 'openai':
203 case 'azure-openai':
204 estimatedTokens = estimateOpenAITokens(text)
205 break
206 case 'anthropic':
207 case 'azure-anthropic':
208 estimatedTokens = estimateAnthropicTokens(text)
209 break
210 case 'google':
211 estimatedTokens = estimateGoogleTokens(text)
212 break
213 default:
214 estimatedTokens = estimateGenericTokens(text, config.avgCharsPerToken)
215 }
216
217 return {
218 count: Math.max(1, Math.round(estimatedTokens)),
219 confidence: config.confidence,
220 provider: effectiveProviderId,
221 method: 'heuristic',
222 }
223}
224
225/**
226 * OpenAI-specific token estimation using BPE characteristics

Callers 9

calculateStreamingCostFunction · 0.90
callEmbeddingAPIFunction · 0.90
processDocumentAsyncFunction · 0.90
createChunkFunction · 0.90
updateChunkFunction · 0.90
route.tsFile · 0.90
estimateInputTokensFunction · 0.85
estimateOutputTokensFunction · 0.85

Calls 5

getProviderConfigFunction · 0.90
estimateOpenAITokensFunction · 0.85
estimateAnthropicTokensFunction · 0.85
estimateGoogleTokensFunction · 0.85
estimateGenericTokensFunction · 0.85

Tested by

no test coverage detected