* Attempt a refusal-fallback model swap in place of a terminal model_refusal. * * This runs entirely inside the active stream loop, BEFORE any error event or * partial error state is committed, so TaskService/waiters never observe an * intermediate refusal while the chain is being attemp
(
workspaceId: WorkspaceId,
streamInfo: WorkspaceStreamInfo,
refusalFinishReason: string,
options?: { preserveParts?: boolean }
)
| 2147 | * fallback model — which intentionally fails instead of skipping ahead). |
| 2148 | */ |
| 2149 | private async tryModelFallbackAfterRefusal( |
| 2150 | workspaceId: WorkspaceId, |
| 2151 | streamInfo: WorkspaceStreamInfo, |
| 2152 | refusalFinishReason: string, |
| 2153 | options?: { preserveParts?: boolean } |
| 2154 | ): Promise<{ kind: "swapped" } | { kind: "terminal"; terminalNote?: string }> { |
| 2155 | const fallbackState = streamInfo.modelFallback; |
| 2156 | const preserveParts = options?.preserveParts === true; |
| 2157 | const refusedModel = normalizeToCanonical(streamInfo.model); |
| 2158 | |
| 2159 | if (!fallbackState) { |
| 2160 | await this.recordTerminalRefusalUsage(workspaceId, streamInfo, refusedModel); |
| 2161 | return { kind: "terminal" }; |
| 2162 | } |
| 2163 | |
| 2164 | fallbackState.refusedModels.push(refusedModel); |
| 2165 | streamInfo.initialMetadata = { |
| 2166 | ...streamInfo.initialMetadata, |
| 2167 | modelFallback: { |
| 2168 | requestedModel: fallbackState.requestedModel, |
| 2169 | refusedModels: [...fallbackState.refusedModels], |
| 2170 | }, |
| 2171 | }; |
| 2172 | |
| 2173 | if (streamInfo.abortController.signal.aborted || streamInfo.softInterrupt.pending) { |
| 2174 | await this.recordRefusedAttemptUsage(workspaceId, streamInfo, refusedModel); |
| 2175 | return { kind: "terminal" }; |
| 2176 | } |
| 2177 | |
| 2178 | // Attribute this refused attempt's usage to the refusing model for EVERY |
| 2179 | // chain outcome (swap, exhaustion, unstartable fallback) before any state |
| 2180 | // reset. Chains that end in a terminal failure must not drop the final |
| 2181 | // hop's tokens from session usage / cost accounting. |
| 2182 | await this.recordRefusedAttemptUsage(workspaceId, streamInfo, refusedModel); |
| 2183 | |
| 2184 | const nextModelString = fallbackState.options.chain[fallbackState.refusedModels.length - 1]; |
| 2185 | if (nextModelString === undefined) { |
| 2186 | return { |
| 2187 | kind: "terminal", |
| 2188 | terminalNote: `Model fallback chain exhausted; refused models: ${fallbackState.refusedModels.join(", ")}.`, |
| 2189 | }; |
| 2190 | } |
| 2191 | |
| 2192 | if (preserveParts && hasIncompleteToolCallPart(streamInfo.parts)) { |
| 2193 | return { |
| 2194 | kind: "terminal", |
| 2195 | terminalNote: |
| 2196 | "Model fallback was skipped because the refused partial response had an incomplete tool call.", |
| 2197 | }; |
| 2198 | } |
| 2199 | |
| 2200 | const continuation = preserveParts |
| 2201 | ? this.buildPartialRefusalContinuationMessage(streamInfo, refusalFinishReason) |
| 2202 | : undefined; |
| 2203 | if (continuation != null && !continuation.success) { |
| 2204 | return { |
| 2205 | kind: "terminal", |
| 2206 | terminalNote: `Model fallback was skipped because ${continuation.error}.`, |
no test coverage detected