MCPcopy Index your code
hub / github.com/simstudioai/sim / estimateOpenAITokens

Function estimateOpenAITokens

apps/sim/lib/tokenization/estimators.ts:228–251  ·  view source on GitHub ↗

* OpenAI-specific token estimation using BPE characteristics

(text: string)

Source from the content-addressed store, hash-verified

226 * OpenAI-specific token estimation using BPE characteristics
227 */
228function estimateOpenAITokens(text: string): number {
229 const words = text.trim().split(/\s+/)
230 let tokenCount = 0
231
232 for (const word of words) {
233 if (word.length === 0) continue
234
235 if (word.length <= 4) {
236 tokenCount += 1
237 } else if (word.length <= 8) {
238 tokenCount += Math.ceil(word.length / 4.5)
239 } else {
240 tokenCount += Math.ceil(word.length / 4)
241 }
242
243 const punctuationCount = (word.match(/[.,!?;:"'()[\]{}<>]/g) || []).length
244 tokenCount += punctuationCount * 0.5
245 }
246
247 const newlineCount = (text.match(/\n/g) || []).length
248 tokenCount += newlineCount * 0.5
249
250 return tokenCount
251}
252
253/**
254 * Anthropic Claude-specific token estimation

Callers 1

estimateTokenCountFunction · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected