Function generateEmbeddings

apps/sim/lib/knowledge/embeddings.ts:353–394 · view source on GitHub ↗

(
  texts: string[],
  embeddingModel: string = DEFAULT_EMBEDDING_MODEL,
  workspaceId?: string | null
)

Source from the content-addressed store, hash-verified

351	* Generate embeddings for multiple texts with token-aware batching and parallel processing.
352	*/
353	export async function generateEmbeddings(
354	texts: string[],
355	embeddingModel: string = DEFAULT_EMBEDDING_MODEL,
356	workspaceId?: string \| null
357	): Promise<GenerateEmbeddingsResult> {
358	const provider = await resolveProvider(embeddingModel, workspaceId)
359
360	const tokenBatches = batchByTokenLimit(texts, MAX_TOKENS_PER_REQUEST, embeddingModel)
361	const batches = provider.maxItemsPerRequest
362	? tokenBatches.flatMap((batch) => splitByItemLimit(batch, provider.maxItemsPerRequest!))
363	: tokenBatches
364
365	const batchResults = await processWithConcurrency(
366	batches,
367	MAX_CONCURRENT_BATCHES,
368	async (batch, i) => {
369	try {
370	return await callEmbeddingAPI(batch, provider, 'document')
371	} catch (error) {
372	logger.error(`Failed to generate embeddings for batch ${i + 1}/${batches.length}:`, error)
373	throw error
374	}
375	}
376	)
377
378	const allEmbeddings: number[][] = []
379	let totalTokens = 0
380	for (const batch of batchResults) {
381	for (const emb of batch.embeddings) {
382	allEmbeddings.push(emb)
383	}
384	totalTokens += batch.totalTokens
385	}
386
387	return {
388	embeddings: allEmbeddings,
389	totalTokens,
390	isBYOK: provider.isBYOK,
391	modelName: provider.modelName,
392	pricingId: provider.pricingId,
393	}
394	}
395
396	/**
397	* Generate embedding for a single search query.

processDocumentAsyncFunction · 0.90

createChunkFunction · 0.90

updateChunkFunction · 0.90

chunkMdxFileMethod · 0.90

utils.test.tsFile · 0.90

batchByTokenLimitFunction · 0.90

resolveProviderFunction · 0.85

splitByItemLimitFunction · 0.85

processWithConcurrencyFunction · 0.85

callEmbeddingAPIFunction · 0.85

errorMethod · 0.80

pushMethod · 0.45

no test coverage detected