Function estimateOpenAITokens

apps/sim/lib/tokenization/estimators.ts:228–251 · view source on GitHub ↗

* OpenAI-specific token estimation using BPE characteristics

(text: string)

Source from the content-addressed store, hash-verified

226	* OpenAI-specific token estimation using BPE characteristics
227	*/
228	function estimateOpenAITokens(text: string): number {
229	const words = text.trim().split(/\s+/)
230	let tokenCount = 0
231
232	for (const word of words) {
233	if (word.length === 0) continue
234
235	if (word.length <= 4) {
236	tokenCount += 1
237	} else if (word.length <= 8) {
238	tokenCount += Math.ceil(word.length / 4.5)
239	} else {
240	tokenCount += Math.ceil(word.length / 4)
241	}
242
243	const punctuationCount = (word.match(/[.,!?;:"'()[\]{}<>]/g) \|\| []).length
244	tokenCount += punctuationCount * 0.5
245	}
246
247	const newlineCount = (text.match(/\n/g) \|\| []).length
248	tokenCount += newlineCount * 0.5
249
250	return tokenCount
251	}
252
253	/**
254	* Anthropic Claude-specific token estimation

estimateTokenCountFunction · 0.85

no outgoing calls

no test coverage detected