( inputs: string[], provider: ResolvedProvider, inputType: EmbeddingInputType )
| 257 | } |
| 258 | |
| 259 | async function callEmbeddingAPI( |
| 260 | inputs: string[], |
| 261 | provider: ResolvedProvider, |
| 262 | inputType: EmbeddingInputType |
| 263 | ): Promise<{ embeddings: number[][]; totalTokens: number }> { |
| 264 | return retryWithExponentialBackoff( |
| 265 | async () => { |
| 266 | const request = provider.buildRequest(inputs, inputType) |
| 267 | |
| 268 | const controller = new AbortController() |
| 269 | const timeout = setTimeout(() => controller.abort(), EMBEDDING_REQUEST_TIMEOUT_MS) |
| 270 | |
| 271 | const response = await fetch(request.apiUrl, { |
| 272 | method: 'POST', |
| 273 | headers: request.headers, |
| 274 | body: JSON.stringify(request.body), |
| 275 | signal: controller.signal, |
| 276 | }).finally(() => clearTimeout(timeout)) |
| 277 | |
| 278 | if (!response.ok) { |
| 279 | const errorText = await response.text() |
| 280 | throw new EmbeddingAPIError( |
| 281 | `Embedding API failed: ${response.status} ${response.statusText} - ${errorText}`, |
| 282 | response.status |
| 283 | ) |
| 284 | } |
| 285 | |
| 286 | const json = await response.json() |
| 287 | const embeddings = request.parse(json) |
| 288 | const usage = (json as { usage?: { total_tokens?: number } }).usage |
| 289 | const totalTokens = |
| 290 | usage?.total_tokens ?? |
| 291 | // Gemini does not return usage.total_tokens — estimate with the provider's tokenizer |
| 292 | inputs.reduce( |
| 293 | (sum, text) => sum + estimateTokenCount(text, provider.tokenizerProvider).count, |
| 294 | 0 |
| 295 | ) |
| 296 | |
| 297 | return { embeddings, totalTokens } |
| 298 | }, |
| 299 | { |
| 300 | maxRetries: 3, |
| 301 | initialDelayMs: 1000, |
| 302 | maxDelayMs: 10000, |
| 303 | retryCondition: (error: unknown) => { |
| 304 | if (error instanceof EmbeddingAPIError) { |
| 305 | return error.status === 429 || error.status >= 500 |
| 306 | } |
| 307 | return isRetryableError(error) |
| 308 | }, |
| 309 | } |
| 310 | ) |
| 311 | } |
| 312 | |
| 313 | function splitByItemLimit<T>(items: T[], limit: number): T[][] { |
| 314 | if (items.length <= limit) return [items] |
no test coverage detected