MCPcopy
hub / github.com/mudler/LocalAI / insert_cache

Method insert_cache

backend/python/mlx/mlx_cache.py:216–255  ·  view source on GitHub ↗

Insert a cache entry after generation completes. Thread-safe. Handles LRU eviction if max_size is exceeded. Args: model: Model identifier (used to namespace caches) tokens: The full token sequence (prompt + generated) prompt_cache: The K

(
        self, model, tokens: List[int], prompt_cache: List[Any]
    )

Source from the content-addressed store, hash-verified

214 return None, list(tokens)
215
216 def insert_cache(
217 self, model, tokens: List[int], prompt_cache: List[Any]
218 ) -> None:
219 """
220 Insert a cache entry after generation completes.
221
222 Thread-safe. Handles LRU eviction if max_size is exceeded.
223
224 Args:
225 model: Model identifier (used to namespace caches)
226 tokens: The full token sequence (prompt + generated)
227 prompt_cache: The KV cache to store
228 """
229 with self._lock:
230 tokens_tuple = tuple(tokens)
231
232 if model not in self._cache:
233 self._cache[model] = {}
234 current = self._cache[model]
235
236 # Build trie path
237 for tok in tokens_tuple:
238 if tok not in current:
239 current[tok] = {}
240 current = current[tok]
241
242 # Update or create entry
243 if "cache" in current:
244 current["cache"].count += 1
245 self._lru.remove((model, tokens_tuple))
246 else:
247 current["cache"] = CacheEntry(prompt_cache, 1)
248
249 # Update LRU order
250 self._lru.append((model, tokens_tuple))
251
252 # Evict if over capacity
253 if len(self._lru) > self.max_size:
254 evict_model, evict_tokens = self._lru.popleft()
255 self._delete(evict_model, evict_tokens)
256
257 def clear(self) -> None:
258 """Clear all cache entries. Thread-safe."""

Calls 4

_deleteMethod · 0.95
appendMethod · 0.80
CacheEntryClass · 0.70
removeMethod · 0.45