* Proxy a single request through x402 payment flow to BlockRun API. * * Optimizations applied in order: * 1. Dedup check — if same request body seen within 30s, replay cached response * 2. Streaming heartbeat — for stream:true, send 200 + heartbeats immediately * 3. Smart routing — when m
( req: IncomingMessage, res: ServerResponse, apiBase: string, payFetch: (input: RequestInfo | URL, init?: RequestInit) => Promise<Response>, options: ProxyOptions, routerOpts: RouterOptions, deduplicator: RequestDeduplicator, balanceMonitor: AnyBalanceMonitor, sessionStore: SessionStore, responseCache: ResponseCache, sessionJournal: SessionJournal, )
| 3496 | * 4. Fallback chain — on provider errors, try next model in tier's fallback list |
| 3497 | */ |
| 3498 | async function proxyRequest( |
| 3499 | req: IncomingMessage, |
| 3500 | res: ServerResponse, |
| 3501 | apiBase: string, |
| 3502 | payFetch: (input: RequestInfo | URL, init?: RequestInit) => Promise<Response>, |
| 3503 | options: ProxyOptions, |
| 3504 | routerOpts: RouterOptions, |
| 3505 | deduplicator: RequestDeduplicator, |
| 3506 | balanceMonitor: AnyBalanceMonitor, |
| 3507 | sessionStore: SessionStore, |
| 3508 | responseCache: ResponseCache, |
| 3509 | sessionJournal: SessionJournal, |
| 3510 | ): Promise<void> { |
| 3511 | const startTime = Date.now(); |
| 3512 | |
| 3513 | // Build upstream URL: /v1/chat/completions → https://blockrun.ai/api/v1/chat/completions |
| 3514 | const upstreamUrl = `${apiBase}${req.url}`; |
| 3515 | |
| 3516 | // Collect request body |
| 3517 | const bodyChunks: Buffer[] = []; |
| 3518 | for await (const chunk of req) { |
| 3519 | bodyChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)); |
| 3520 | } |
| 3521 | let body = Buffer.concat(bodyChunks); |
| 3522 | |
| 3523 | // Track original context size for response headers |
| 3524 | const originalContextSizeKB = Math.ceil(body.length / 1024); |
| 3525 | |
| 3526 | // Routing debug info is on by default; disable per-request with |
| 3527 | // x-clawrouter-debug: false, or globally with CLAWROUTER_DEBUG_HEADERS=off |
| 3528 | const debugMode = debugHeadersEnabledFromEnv() && req.headers["x-clawrouter-debug"] !== "false"; |
| 3529 | |
| 3530 | // --- Smart routing --- |
| 3531 | let routingDecision: RoutingDecision | undefined; |
| 3532 | let hasTools = false; // true when request includes a tools schema |
| 3533 | let hasVision = false; // true when request includes image_url content parts |
| 3534 | let isStreaming = false; |
| 3535 | let modelId = ""; |
| 3536 | let maxTokens = 4096; |
| 3537 | let routingProfile: "eco" | "auto" | "premium" | null = null; |
| 3538 | let stickyExplicitModel: string | undefined; |
| 3539 | let balanceFallbackNotice: string | undefined; |
| 3540 | let budgetDowngradeNotice: string | undefined; |
| 3541 | let budgetDowngradeHeaderMode: "downgraded" | undefined; |
| 3542 | let accumulatedContent = ""; // For session journal event extraction |
| 3543 | let responseInputTokens: number | undefined; |
| 3544 | let responseOutputTokens: number | undefined; |
| 3545 | let requestHadError = false; // Set to true when all models fail → used in logUsage |
| 3546 | let requestSummaryForStore = ""; // Truncated user prompt — captured for response-store entry |
| 3547 | const isChatCompletion = req.url?.includes("/chat/completions"); |
| 3548 | |
| 3549 | // Extract session ID early for journal operations (header-only at this point) |
| 3550 | const sessionId = getSessionId(req.headers as Record<string, string | string[] | undefined>); |
| 3551 | // Full session ID (header + content-derived) — populated once messages are parsed |
| 3552 | let effectiveSessionId: string | undefined = sessionId; |
| 3553 | |
| 3554 | if (isChatCompletion && body.length > 0) { |
| 3555 | try { |
no test coverage detected