(text: string, modelName = 'text-embedding-3-small')
| 62 | * Returns an array of token strings that can be displayed with colors |
| 63 | */ |
| 64 | export function getTokenStrings(text: string, modelName = 'text-embedding-3-small'): string[] { |
| 65 | if (!text || text.length === 0) { |
| 66 | return [] |
| 67 | } |
| 68 | |
| 69 | try { |
| 70 | const encoding = getEncoding(modelName) |
| 71 | const tokenIds = encoding.encode(text) |
| 72 | |
| 73 | const textChars = [...text] |
| 74 | const result: string[] = [] |
| 75 | let prevCharCount = 0 |
| 76 | |
| 77 | for (let i = 0; i < tokenIds.length; i++) { |
| 78 | const decoded = encoding.decode(tokenIds.slice(0, i + 1)) |
| 79 | const currentCharCount = [...decoded].length |
| 80 | const tokenCharCount = currentCharCount - prevCharCount |
| 81 | |
| 82 | const tokenStr = textChars.slice(prevCharCount, prevCharCount + tokenCharCount).join('') |
| 83 | result.push(tokenStr) |
| 84 | prevCharCount = currentCharCount |
| 85 | } |
| 86 | |
| 87 | return result |
| 88 | } catch (error) { |
| 89 | logger.error('Error getting token strings:', error) |
| 90 | return text.split(/(\s+)/).filter((s) => s.length > 0) |
| 91 | } |
| 92 | } |
| 93 | |
| 94 | /** |
| 95 | * Truncate text to a maximum token count |
no test coverage detected