MCPcopy Index your code
hub / github.com/simstudioai/sim / getTokenStrings

Function getTokenStrings

apps/sim/lib/tokenization/estimators.ts:64–92  ·  view source on GitHub ↗
(text: string, modelName = 'text-embedding-3-small')

Source from the content-addressed store, hash-verified

62 * Returns an array of token strings that can be displayed with colors
63 */
64export function getTokenStrings(text: string, modelName = 'text-embedding-3-small'): string[] {
65 if (!text || text.length === 0) {
66 return []
67 }
68
69 try {
70 const encoding = getEncoding(modelName)
71 const tokenIds = encoding.encode(text)
72
73 const textChars = [...text]
74 const result: string[] = []
75 let prevCharCount = 0
76
77 for (let i = 0; i < tokenIds.length; i++) {
78 const decoded = encoding.decode(tokenIds.slice(0, i + 1))
79 const currentCharCount = [...decoded].length
80 const tokenCharCount = currentCharCount - prevCharCount
81
82 const tokenStr = textChars.slice(prevCharCount, prevCharCount + tokenCharCount).join('')
83 result.push(tokenStr)
84 prevCharCount = currentCharCount
85 }
86
87 return result
88 } catch (error) {
89 logger.error('Error getting token strings:', error)
90 return text.split(/(\s+)/).filter((s) => s.length > 0)
91 }
92}
93
94/**
95 * Truncate text to a maximum token count

Callers 1

ChunkEditorFunction · 0.90

Calls 4

getEncodingFunction · 0.85
joinMethod · 0.80
errorMethod · 0.80
pushMethod · 0.45

Tested by

no test coverage detected