Function truncateToTokenLimit

apps/sim/lib/tokenization/estimators.ts:98–128 · view source on GitHub ↗

(
  text: string,
  maxTokens: number,
  modelName = 'text-embedding-3-small'
)

Source from the content-addressed store, hash-verified

96	* Useful for handling texts that exceed model limits
97	*/
98	export function truncateToTokenLimit(
99	text: string,
100	maxTokens: number,
101	modelName = 'text-embedding-3-small'
102	): string {
103	if (!text \|\| maxTokens <= 0) {
104	return ''
105	}
106
107	try {
108	const encoding = getEncoding(modelName)
109	const tokens = encoding.encode(text)
110
111	if (tokens.length <= maxTokens) {
112	return text
113	}
114
115	const truncatedTokens = tokens.slice(0, maxTokens)
116	const truncatedText = encoding.decode(truncatedTokens)
117
118	logger.warn(
119	`Truncated text from ${tokens.length} to ${maxTokens} tokens (${text.length} to ${truncatedText.length} chars)`
120	)
121
122	return truncatedText
123	} catch (error) {
124	logger.error('Error truncating text:', error)
125	const maxChars = maxTokens * 4
126	return text.slice(0, maxChars)
127	}
128	}
129
130	/**
131	* Batch texts by token count to stay within API limits

batchByTokenLimitFunction · 0.85

getEncodingFunction · 0.85

errorMethod · 0.80

warnMethod · 0.65

no test coverage detected