( messages: (UserMessage | AssistantMessage)[], enablePromptCaching: boolean, querySource?: QuerySource, useCachedMC = false, newCacheEdits?: CachedMCEditsBlock | null, pinnedEdits?: CachedMCPinnedEdits[], skipCacheWrite = false, )
| 3061 | |
| 3062 | // Exported for testing cache_reference placement constraints |
| 3063 | export function addCacheBreakpoints( |
| 3064 | messages: (UserMessage | AssistantMessage)[], |
| 3065 | enablePromptCaching: boolean, |
| 3066 | querySource?: QuerySource, |
| 3067 | useCachedMC = false, |
| 3068 | newCacheEdits?: CachedMCEditsBlock | null, |
| 3069 | pinnedEdits?: CachedMCPinnedEdits[], |
| 3070 | skipCacheWrite = false, |
| 3071 | ): MessageParam[] { |
| 3072 | logEvent('tengu_api_cache_breakpoints', { |
| 3073 | totalMessageCount: messages.length, |
| 3074 | cachingEnabled: enablePromptCaching, |
| 3075 | skipCacheWrite, |
| 3076 | }) |
| 3077 | |
| 3078 | // Exactly one message-level cache_control marker per request. Mycro's |
| 3079 | // turn-to-turn eviction (page_manager/index.rs: Index::insert) frees |
| 3080 | // local-attention KV pages at any cached prefix position NOT in |
| 3081 | // cache_store_int_token_boundaries. With two markers the second-to-last |
| 3082 | // position is protected and its locals survive an extra turn even though |
| 3083 | // nothing will ever resume from there — with one marker they're freed |
| 3084 | // immediately. For fire-and-forget forks (skipCacheWrite) we shift the |
| 3085 | // marker to the second-to-last message: that's the last shared-prefix |
| 3086 | // point, so the write is a no-op merge on mycro (entry already exists) |
| 3087 | // and the fork doesn't leave its own tail in the KVCC. Dense pages are |
| 3088 | // refcounted and survive via the new hash either way. |
| 3089 | const markerIndex = skipCacheWrite ? messages.length - 2 : messages.length - 1 |
| 3090 | const result = messages.map((msg, index) => { |
| 3091 | const addCache = index === markerIndex |
| 3092 | if (msg.type === 'user') { |
| 3093 | return userMessageToMessageParam( |
| 3094 | msg, |
| 3095 | addCache, |
| 3096 | enablePromptCaching, |
| 3097 | querySource, |
| 3098 | ) |
| 3099 | } |
| 3100 | return assistantMessageToMessageParam( |
| 3101 | msg, |
| 3102 | addCache, |
| 3103 | enablePromptCaching, |
| 3104 | querySource, |
| 3105 | ) |
| 3106 | }) |
| 3107 | |
| 3108 | if (!useCachedMC) { |
| 3109 | return result |
| 3110 | } |
| 3111 | |
| 3112 | // Track all cache_references being deleted to prevent duplicates across blocks. |
| 3113 | const seenDeleteRefs = new Set<string>() |
| 3114 | |
| 3115 | // Helper to deduplicate a cache_edits block against already-seen deletions |
| 3116 | const deduplicateEdits = (block: CachedMCEditsBlock): CachedMCEditsBlock => { |
| 3117 | const uniqueEdits = block.edits.filter(edit => { |
| 3118 | if (seenDeleteRefs.has(edit.cache_reference)) { |
| 3119 | return false |
| 3120 | } |
no test coverage detected