( text: string, maxTokens: number, modelName = 'text-embedding-3-small' )
| 96 | * Useful for handling texts that exceed model limits |
| 97 | */ |
| 98 | export function truncateToTokenLimit( |
| 99 | text: string, |
| 100 | maxTokens: number, |
| 101 | modelName = 'text-embedding-3-small' |
| 102 | ): string { |
| 103 | if (!text || maxTokens <= 0) { |
| 104 | return '' |
| 105 | } |
| 106 | |
| 107 | try { |
| 108 | const encoding = getEncoding(modelName) |
| 109 | const tokens = encoding.encode(text) |
| 110 | |
| 111 | if (tokens.length <= maxTokens) { |
| 112 | return text |
| 113 | } |
| 114 | |
| 115 | const truncatedTokens = tokens.slice(0, maxTokens) |
| 116 | const truncatedText = encoding.decode(truncatedTokens) |
| 117 | |
| 118 | logger.warn( |
| 119 | `Truncated text from ${tokens.length} to ${maxTokens} tokens (${text.length} to ${truncatedText.length} chars)` |
| 120 | ) |
| 121 | |
| 122 | return truncatedText |
| 123 | } catch (error) { |
| 124 | logger.error('Error truncating text:', error) |
| 125 | const maxChars = maxTokens * 4 |
| 126 | return text.slice(0, maxChars) |
| 127 | } |
| 128 | } |
| 129 | |
| 130 | /** |
| 131 | * Batch texts by token count to stay within API limits |
no test coverage detected