({
messages,
summaryRequest,
appState,
context,
preCompactTokenCount,
cacheSafeParams,
}: {
messages: Message[]
summaryRequest: UserMessage
appState: Awaited<ReturnType<ToolUseContext['getAppState']>>
context: ToolUseContext
preCompactTokenCount: number
cacheSafeParams: CacheSafeParams
})
| 1134 | } |
| 1135 | |
| 1136 | async function streamCompactSummary({ |
| 1137 | messages, |
| 1138 | summaryRequest, |
| 1139 | appState, |
| 1140 | context, |
| 1141 | preCompactTokenCount, |
| 1142 | cacheSafeParams, |
| 1143 | }: { |
| 1144 | messages: Message[] |
| 1145 | summaryRequest: UserMessage |
| 1146 | appState: Awaited<ReturnType<ToolUseContext['getAppState']>> |
| 1147 | context: ToolUseContext |
| 1148 | preCompactTokenCount: number |
| 1149 | cacheSafeParams: CacheSafeParams |
| 1150 | }): Promise<AssistantMessage> { |
| 1151 | // When prompt cache sharing is enabled, use forked agent to reuse the |
| 1152 | // main conversation's cached prefix (system prompt, tools, context messages). |
| 1153 | // Falls back to regular streaming path on failure. |
| 1154 | // 3P default: true — see comment at the other tengu_compact_cache_prefix read above. |
| 1155 | const promptCacheSharingEnabled = getFeatureValue_CACHED_MAY_BE_STALE( |
| 1156 | 'tengu_compact_cache_prefix', |
| 1157 | true, |
| 1158 | ) |
| 1159 | // Send keep-alive signals during compaction to prevent remote session |
| 1160 | // WebSocket idle timeouts from dropping bridge connections. Compaction |
| 1161 | // API calls can take 5-10+ seconds, during which no other messages |
| 1162 | // flow through the transport — without keep-alives, the server may |
| 1163 | // close the WebSocket for inactivity. |
| 1164 | // Two signals: (1) PUT /worker heartbeat via sessionActivity, and |
| 1165 | // (2) re-emit 'compacting' status so the SDK event stream stays active |
| 1166 | // and the server doesn't consider the session stale. |
| 1167 | const activityInterval = isSessionActivityTrackingActive() |
| 1168 | ? setInterval( |
| 1169 | (statusSetter?: (status: 'compacting' | null) => void) => { |
| 1170 | sendSessionActivitySignal() |
| 1171 | statusSetter?.('compacting') |
| 1172 | }, |
| 1173 | 30_000, |
| 1174 | context.setSDKStatus, |
| 1175 | ) |
| 1176 | : undefined |
| 1177 | |
| 1178 | try { |
| 1179 | if (promptCacheSharingEnabled) { |
| 1180 | try { |
| 1181 | // DO NOT set maxOutputTokens here. The fork piggybacks on the main thread's |
| 1182 | // prompt cache by sending identical cache-key params (system, tools, model, |
| 1183 | // messages prefix, thinking config). Setting maxOutputTokens would clamp |
| 1184 | // budget_tokens via Math.min(budget, maxOutputTokens-1) in claude.ts, |
| 1185 | // creating a thinking config mismatch that invalidates the cache. |
| 1186 | // The streaming fallback path (below) can safely set maxOutputTokensOverride |
| 1187 | // since it doesn't share cache with the main thread. |
| 1188 | const result = await runForkedAgent({ |
| 1189 | promptMessages: [summaryRequest], |
| 1190 | cacheSafeParams, |
| 1191 | canUseTool: createCompactCanUseTool(), |
| 1192 | querySource: 'compact', |
| 1193 | forkLabel: 'compact', |
no test coverage detected