execute attempts to create a stream and get a response using the primary model, falling back to configured fallback models if the primary fails. Retry behavior: - Retryable errors (5xx, timeouts): retry the same model with exponential backoff - Non-retryable errors (429, 4xx): skip to the next mode
( ctx context.Context, a *agent.Agent, primaryModel provider.Provider, messages []chat.Message, agentTools []tools.Tool, sess *session.Session, m *modelsdev.Model, events EventSink, )
| 224 | // |
| 225 | // Returns the stream result, the model that was used, and any error. |
| 226 | func (e *fallbackExecutor) execute( |
| 227 | ctx context.Context, |
| 228 | a *agent.Agent, |
| 229 | primaryModel provider.Provider, |
| 230 | messages []chat.Message, |
| 231 | agentTools []tools.Tool, |
| 232 | sess *session.Session, |
| 233 | m *modelsdev.Model, |
| 234 | events EventSink, |
| 235 | ) (streamResult, provider.Provider, error) { |
| 236 | fallbackModels := a.FallbackModels() |
| 237 | fallbackRetries := getEffectiveRetries(a) |
| 238 | modelChain := buildModelChain(primaryModel, fallbackModels) |
| 239 | startIndex := e.chainStartIndex(a, len(fallbackModels)) |
| 240 | |
| 241 | // One runtime.fallback span wraps the whole chain. Each per-model |
| 242 | // CreateChatCompletionStream call below opens its own `chat {model}` |
| 243 | // CLIENT child span via the provider decorator, so the fallback span |
| 244 | // is a useful aggregate boundary (total attempts, final model, |
| 245 | // terminal outcome) without duplicating per-model timing data. |
| 246 | ctx, fbSpan := genai.StartFallback(ctx, a.Name(), primaryModel.ID().Model, startIndex > 0) |
| 247 | defer fbSpan.End() |
| 248 | |
| 249 | var lastErr error |
| 250 | primaryFailedWithNonRetryable := false |
| 251 | hasFallbacks := len(fallbackModels) > 0 |
| 252 | |
| 253 | for chainIdx := startIndex; chainIdx < len(modelChain); chainIdx++ { |
| 254 | modelEntry := modelChain[chainIdx] |
| 255 | |
| 256 | // Each model in the chain gets (1 + retries) attempts for retryable errors. |
| 257 | // Non-retryable errors (429 with fallbacks, 4xx) skip immediately to the next model. |
| 258 | // 429 without fallbacks is retried directly on the same model. |
| 259 | maxAttempts := 1 + fallbackRetries |
| 260 | |
| 261 | for attempt := range maxAttempts { |
| 262 | // Check context before each attempt |
| 263 | if ctx.Err() != nil { |
| 264 | fbSpan.SetOutcome(genai.FallbackOutcomeContextCanceled) |
| 265 | return streamResult{}, nil, ctx.Err() |
| 266 | } |
| 267 | fbSpan.IncrementAttempt() |
| 268 | |
| 269 | // Apply backoff before retry (not on first attempt of each model) |
| 270 | if attempt > 0 { |
| 271 | backoffDelay := backoff.Calculate(attempt - 1) |
| 272 | logRetryBackoff(a.Name(), modelEntry.provider.ID(), attempt, backoffDelay) |
| 273 | if !backoff.SleepWithContext(ctx, backoffDelay) { |
| 274 | fbSpan.SetOutcome(genai.FallbackOutcomeContextCanceled) |
| 275 | return streamResult{}, nil, ctx.Err() |
| 276 | } |
| 277 | } |
| 278 | |
| 279 | // Emit fallback event when transitioning to a new model (but not when starting in cooldown) |
| 280 | if chainIdx > startIndex && attempt == 0 { |
| 281 | logFallbackAttempt(a.Name(), modelEntry, attempt, fallbackRetries, lastErr) |
| 282 | prevModelID := modelChain[chainIdx-1].provider.ID() |
| 283 | reason := "" |
no test coverage detected