({
messages,
summaryRequest,
appState,
context,
preCompactTokenCount,
cacheSafeParams,
}: {
messages: Message[]
summaryRequest: UserMessage
appState: Awaited<ReturnType<ToolUseContext['getAppState']>>
context: ToolUseContext
preCompactTokenCount: number
cacheSafeParams: CacheSafeParams
})
| 1168 | } |
| 1169 | |
| 1170 | async function streamCompactSummary({ |
| 1171 | messages, |
| 1172 | summaryRequest, |
| 1173 | appState, |
| 1174 | context, |
| 1175 | preCompactTokenCount, |
| 1176 | cacheSafeParams, |
| 1177 | }: { |
| 1178 | messages: Message[] |
| 1179 | summaryRequest: UserMessage |
| 1180 | appState: Awaited<ReturnType<ToolUseContext['getAppState']>> |
| 1181 | context: ToolUseContext |
| 1182 | preCompactTokenCount: number |
| 1183 | cacheSafeParams: CacheSafeParams |
| 1184 | }): Promise<AssistantMessage> { |
| 1185 | // When prompt cache sharing is enabled, use forked agent to reuse the |
| 1186 | // main conversation's cached prefix (system prompt, tools, context messages). |
| 1187 | // Falls back to regular streaming path on failure. |
| 1188 | // 3P default: true — see comment at the other tengu_compact_cache_prefix read above. |
| 1189 | const promptCacheSharingEnabled = getFeatureValue_CACHED_MAY_BE_STALE( |
| 1190 | 'tengu_compact_cache_prefix', |
| 1191 | true, |
| 1192 | ) |
| 1193 | // Send keep-alive signals during compaction to prevent remote session |
| 1194 | // WebSocket idle timeouts from dropping bridge connections. Compaction |
| 1195 | // API calls can take 5-10+ seconds, during which no other messages |
| 1196 | // flow through the transport — without keep-alives, the server may |
| 1197 | // close the WebSocket for inactivity. |
| 1198 | // Two signals: (1) PUT /worker heartbeat via sessionActivity, and |
| 1199 | // (2) re-emit 'compacting' status so the SDK event stream stays active |
| 1200 | // and the server doesn't consider the session stale. |
| 1201 | const activityInterval = isSessionActivityTrackingActive() |
| 1202 | ? setInterval( |
| 1203 | (statusSetter?: (status: 'compacting' | null) => void) => { |
| 1204 | sendSessionActivitySignal() |
| 1205 | statusSetter?.('compacting') |
| 1206 | }, |
| 1207 | 30_000, |
| 1208 | context.setSDKStatus, |
| 1209 | ) |
| 1210 | : undefined |
| 1211 | |
| 1212 | try { |
| 1213 | if (promptCacheSharingEnabled) { |
| 1214 | try { |
| 1215 | // DO NOT set maxOutputTokens here. The fork piggybacks on the main thread's |
| 1216 | // prompt cache by sending identical cache-key params (system, tools, model, |
| 1217 | // messages prefix, thinking config). Setting maxOutputTokens would clamp |
| 1218 | // budget_tokens via Math.min(budget, maxOutputTokens-1) in claude.ts, |
| 1219 | // creating a thinking config mismatch that invalidates the cache. |
| 1220 | // The streaming fallback path (below) can safely set maxOutputTokensOverride |
| 1221 | // since it doesn't share cache with the main thread. |
| 1222 | const result = await runForkedAgent({ |
| 1223 | promptMessages: [summaryRequest], |
| 1224 | cacheSafeParams, |
| 1225 | canUseTool: createCompactCanUseTool(), |
| 1226 | querySource: 'compact', |
| 1227 | forkLabel: 'compact', |
no test coverage detected