MCPcopy Index your code
hub / github.com/simstudioai/sim / generateEmbeddings

Function generateEmbeddings

apps/sim/lib/knowledge/embeddings.ts:353–394  ·  view source on GitHub ↗
(
  texts: string[],
  embeddingModel: string = DEFAULT_EMBEDDING_MODEL,
  workspaceId?: string | null
)

Source from the content-addressed store, hash-verified

351 * Generate embeddings for multiple texts with token-aware batching and parallel processing.
352 */
353export async function generateEmbeddings(
354 texts: string[],
355 embeddingModel: string = DEFAULT_EMBEDDING_MODEL,
356 workspaceId?: string | null
357): Promise<GenerateEmbeddingsResult> {
358 const provider = await resolveProvider(embeddingModel, workspaceId)
359
360 const tokenBatches = batchByTokenLimit(texts, MAX_TOKENS_PER_REQUEST, embeddingModel)
361 const batches = provider.maxItemsPerRequest
362 ? tokenBatches.flatMap((batch) => splitByItemLimit(batch, provider.maxItemsPerRequest!))
363 : tokenBatches
364
365 const batchResults = await processWithConcurrency(
366 batches,
367 MAX_CONCURRENT_BATCHES,
368 async (batch, i) => {
369 try {
370 return await callEmbeddingAPI(batch, provider, 'document')
371 } catch (error) {
372 logger.error(`Failed to generate embeddings for batch ${i + 1}/${batches.length}:`, error)
373 throw error
374 }
375 }
376 )
377
378 const allEmbeddings: number[][] = []
379 let totalTokens = 0
380 for (const batch of batchResults) {
381 for (const emb of batch.embeddings) {
382 allEmbeddings.push(emb)
383 }
384 totalTokens += batch.totalTokens
385 }
386
387 return {
388 embeddings: allEmbeddings,
389 totalTokens,
390 isBYOK: provider.isBYOK,
391 modelName: provider.modelName,
392 pricingId: provider.pricingId,
393 }
394}
395
396/**
397 * Generate embedding for a single search query.

Callers 5

processDocumentAsyncFunction · 0.90
createChunkFunction · 0.90
updateChunkFunction · 0.90
chunkMdxFileMethod · 0.90
utils.test.tsFile · 0.90

Calls 7

batchByTokenLimitFunction · 0.90
resolveProviderFunction · 0.85
splitByItemLimitFunction · 0.85
processWithConcurrencyFunction · 0.85
callEmbeddingAPIFunction · 0.85
errorMethod · 0.80
pushMethod · 0.45

Tested by

no test coverage detected