MCPcopy
hub / github.com/janhq/jan / buildEmbedBatches

Function buildEmbedBatches

extensions/llamacpp-extension/src/util.ts:230–280  ·  view source on GitHub ↗
(
  inputs: string[],
  ubatchSize: number,
  charsPerToken = DEFAULT_CHARS_PER_TOKEN
)

Source from the content-addressed store, hash-verified

228}
229
230export function buildEmbedBatches(
231 inputs: string[],
232 ubatchSize: number,
233 charsPerToken = DEFAULT_CHARS_PER_TOKEN
234): EmbedBatch[] {
235 const minUbatchSize = Math.ceil(1 / UBATCH_SAFETY_MARGIN)
236 if (ubatchSize < minUbatchSize) {
237 throw new Error(
238 `ubatch_size (${ubatchSize}) is too small. Minimum required: ${minUbatchSize}`
239 )
240 }
241
242 const safeLimit = Math.floor(ubatchSize * UBATCH_SAFETY_MARGIN)
243
244 const batches: EmbedBatch[] = []
245 let current: string[] = []
246 let currentTokens = 0
247 let offset = 0
248
249 const push = () => {
250 if (current.length) {
251 batches.push({ batch: current, offset })
252 offset += current.length
253 current = []
254 currentTokens = 0
255 }
256 }
257
258 for (const raw of inputs) {
259 const text =
260 estimateTokensFromText(raw, charsPerToken) > safeLimit
261 ? truncateToTokenBudget(raw, safeLimit, charsPerToken)
262 : raw
263 const estTokens = estimateTokensFromText(text, charsPerToken)
264
265 if (currentTokens + estTokens > safeLimit && current.length) {
266 push()
267 }
268
269 current.push(text)
270 currentTokens += estTokens
271 }
272
273 push()
274
275 if (batches.some(b => b.batch.length === 0)) {
276 throw new Error('Internal error: empty batch detected')
277 }
278
279 return batches
280}
281
282export function mergeEmbedResponses(
283 model: string,

Callers 2

embedMethod · 0.90
util.test.tsFile · 0.90

Calls 4

estimateTokensFromTextFunction · 0.85
truncateToTokenBudgetFunction · 0.85
pushFunction · 0.85
pushMethod · 0.45

Tested by

no test coverage detected