(text: string, providerId?: string)
| 184 | * Estimates token count for text using provider-specific heuristics |
| 185 | */ |
| 186 | export function estimateTokenCount(text: string, providerId?: string): TokenEstimate { |
| 187 | if (!text || text.length < MIN_TEXT_LENGTH_FOR_ESTIMATION) { |
| 188 | return { |
| 189 | count: 0, |
| 190 | confidence: 'high', |
| 191 | provider: providerId || 'unknown', |
| 192 | method: 'fallback', |
| 193 | } |
| 194 | } |
| 195 | |
| 196 | const effectiveProviderId = providerId || TOKENIZATION_CONFIG.defaults.provider |
| 197 | const config = getProviderConfig(effectiveProviderId) |
| 198 | |
| 199 | let estimatedTokens: number |
| 200 | |
| 201 | switch (effectiveProviderId) { |
| 202 | case 'openai': |
| 203 | case 'azure-openai': |
| 204 | estimatedTokens = estimateOpenAITokens(text) |
| 205 | break |
| 206 | case 'anthropic': |
| 207 | case 'azure-anthropic': |
| 208 | estimatedTokens = estimateAnthropicTokens(text) |
| 209 | break |
| 210 | case 'google': |
| 211 | estimatedTokens = estimateGoogleTokens(text) |
| 212 | break |
| 213 | default: |
| 214 | estimatedTokens = estimateGenericTokens(text, config.avgCharsPerToken) |
| 215 | } |
| 216 | |
| 217 | return { |
| 218 | count: Math.max(1, Math.round(estimatedTokens)), |
| 219 | confidence: config.confidence, |
| 220 | provider: effectiveProviderId, |
| 221 | method: 'heuristic', |
| 222 | } |
| 223 | } |
| 224 | |
| 225 | /** |
| 226 | * OpenAI-specific token estimation using BPE characteristics |
no test coverage detected