(err: any)
| 4 | // without forming an import cycle (fusion ↔ proxy in particular). |
| 5 | |
| 6 | export function isRetryableError(err: any): boolean { |
| 7 | const msg = (err.message ?? '').toLowerCase(); |
| 8 | // Trust the upstream HTTP status the provider attached to the error first |
| 9 | // (providerHttpError in providers/base.ts sets err.status on every adapter). |
| 10 | // This structured check is the robust primary signal; the message-substring |
| 11 | // rules below are the fallback for errors that carry a code in their text but |
| 12 | // no numeric status. It's the fix for #337/#339: an Ollama "410 Gone", or any |
| 13 | // upstream 5xx the substring allowlist never enumerated (502/504/507…), used to |
| 14 | // fall through to a 502 and STRAND the healthy paid routes still queued later in |
| 15 | // the chain — because the old code matched specific substrings and ignored |
| 16 | // err.status for every code except 403. 408 (request timeout), 409 (conflict), |
| 17 | // 410 (model pulled upstream), 429 (rate limit) and all 5xx are transient or |
| 18 | // fail-over-able; 400/401 stay fatal (status 0 here, handled by the absence of a |
| 19 | // matching rule) and 403 is handled by isModelAccessForbiddenError below. |
| 20 | const status = typeof err?.status === 'number' ? err.status : 0; |
| 21 | if (status === 408 || status === 409 || status === 410 || status === 429 || status >= 500) return true; |
| 22 | return msg.includes('429') || msg.includes('rate limit') || msg.includes('too many requests') |
| 23 | || msg.includes('quota') || msg.includes('resource_exhausted') |
| 24 | || msg.includes('aborted') || msg.includes('timeout') || msg.includes('etimedout') |
| 25 | || msg.includes('econnrefused') || msg.includes('econnreset') |
| 26 | || msg.includes('fetch failed') // undici transport error (proxy down, DNS, TLS, etc.) |
| 27 | || msg.includes('503') || msg.includes('unavailable') |
| 28 | || msg.includes('500') || msg.includes('internal server error') |
| 29 | // 413: this model's payload limit is too small for the request, but another |
| 30 | // provider in the fallback chain may have a larger limit. Same reasoning as 503. |
| 31 | || msg.includes('413') || msg.includes('payload too large') || msg.includes('request body too large') |
| 32 | || msg.includes('request entity too large') || msg.includes('content too large') |
| 33 | // 404: model deprecated/removed upstream (e.g. OpenRouter's "no endpoints found" |
| 34 | // for a model that's been pulled). Rotate to the next model in the chain — |
| 35 | // setCooldown + the health checker will avoid this model on subsequent requests. |
| 36 | || msg.includes('404') || msg.includes('not found') || msg.includes('no endpoints found') |
| 37 | // 410: the model/endpoint was permanently removed upstream (e.g. Ollama Cloud |
| 38 | // "API error 410: Gone", #339). Like a 404 it won't return on this provider, so |
| 39 | // rotate to the next route; isModelNotFoundError benches the whole model. The |
| 40 | // structured status check above already catches the 410 when the provider |
| 41 | // attaches err.status — this is the text fallback for errors that don't. |
| 42 | || msg.includes('410') || msg.includes('gone') |
| 43 | // 403: the key is valid (it passed validateKey, and the health checker |
| 44 | // disables truly-forbidden keys) but this specific model is off-limits to |
| 45 | // the key's tier — e.g. gpt-4o on GitHub Models' free tier, subscription-only |
| 46 | // models on Cloudflare. Another model in the chain is reachable, so fail over |
| 47 | // instead of 502-ing the whole request. Paired with isModelAccessForbiddenError |
| 48 | // to rule the model out for this request and a day-long bench. See issue #256. |
| 49 | || isModelAccessForbiddenError(err) |
| 50 | // 400: one provider may reject parameters another accepts (e.g. max_tokens |
| 51 | // limits, unsupported params). The matching pattern is "api error 400" |
| 52 | // which comes from the OpenAI-compat provider's error formatting, not |
| 53 | // a bare "400" which is deliberately non-retryable for validation errors. |
| 54 | || msg.includes('api error 400') |
| 55 | // 402: this provider/key is out of credits (e.g. HuggingFace Router |
| 56 | // "API error 402: Payment required"). The SAME model often lives on another |
| 57 | // provider (Kimi K2.6 is on HF + Cloudflare + NVIDIA), so fail over instead |
| 58 | // of killing the workflow. Paired with a long cooldown (isPaymentRequiredError) |
| 59 | // so we don't re-hammer the broke key every retry. |
| 60 | || isPaymentRequiredError(err) |
| 61 | // Dead-turn classes from the stream turn-integrity layer (#231 audit): |
| 62 | // all thrown before any byte reached the client, so another model can |
| 63 | // serve the request invisibly. |
no test coverage detected