( texts: string[], embeddingModel: string = DEFAULT_EMBEDDING_MODEL, workspaceId?: string | null )
| 351 | * Generate embeddings for multiple texts with token-aware batching and parallel processing. |
| 352 | */ |
| 353 | export async function generateEmbeddings( |
| 354 | texts: string[], |
| 355 | embeddingModel: string = DEFAULT_EMBEDDING_MODEL, |
| 356 | workspaceId?: string | null |
| 357 | ): Promise<GenerateEmbeddingsResult> { |
| 358 | const provider = await resolveProvider(embeddingModel, workspaceId) |
| 359 | |
| 360 | const tokenBatches = batchByTokenLimit(texts, MAX_TOKENS_PER_REQUEST, embeddingModel) |
| 361 | const batches = provider.maxItemsPerRequest |
| 362 | ? tokenBatches.flatMap((batch) => splitByItemLimit(batch, provider.maxItemsPerRequest!)) |
| 363 | : tokenBatches |
| 364 | |
| 365 | const batchResults = await processWithConcurrency( |
| 366 | batches, |
| 367 | MAX_CONCURRENT_BATCHES, |
| 368 | async (batch, i) => { |
| 369 | try { |
| 370 | return await callEmbeddingAPI(batch, provider, 'document') |
| 371 | } catch (error) { |
| 372 | logger.error(`Failed to generate embeddings for batch ${i + 1}/${batches.length}:`, error) |
| 373 | throw error |
| 374 | } |
| 375 | } |
| 376 | ) |
| 377 | |
| 378 | const allEmbeddings: number[][] = [] |
| 379 | let totalTokens = 0 |
| 380 | for (const batch of batchResults) { |
| 381 | for (const emb of batch.embeddings) { |
| 382 | allEmbeddings.push(emb) |
| 383 | } |
| 384 | totalTokens += batch.totalTokens |
| 385 | } |
| 386 | |
| 387 | return { |
| 388 | embeddings: allEmbeddings, |
| 389 | totalTokens, |
| 390 | isBYOK: provider.isBYOK, |
| 391 | modelName: provider.modelName, |
| 392 | pricingId: provider.pricingId, |
| 393 | } |
| 394 | } |
| 395 | |
| 396 | /** |
| 397 | * Generate embedding for a single search query. |
no test coverage detected