MCPcopy
hub / github.com/mudler/LocalAI / test_cache_reuse

Method test_cache_reuse

backend/python/mlx/test.py:179–209  ·  view source on GitHub ↗

This method tests that repeated prompts reuse cached KV states. The second request should benefit from the cached prompt processing.

(self)

Source from the content-addressed store, hash-verified

177 self.tearDown()
178
179 def test_cache_reuse(self):
180 """
181 This method tests that repeated prompts reuse cached KV states.
182 The second request should benefit from the cached prompt processing.
183 """
184 try:
185 self.setUp()
186 with grpc.insecure_channel("localhost:50051") as channel:
187 stub = backend_pb2_grpc.BackendStub(channel)
188 response = stub.LoadModel(backend_pb2.ModelOptions(Model="mlx-community/Llama-3.2-1B-Instruct-4bit"))
189 self.assertTrue(response.success)
190
191 prompt = "The quick brown fox jumps over the lazy dog. "
192
193 # First request - populates cache
194 req1 = backend_pb2.PredictOptions(Prompt=prompt, Tokens=10)
195 resp1 = stub.Predict(req1)
196 self.assertIsNotNone(resp1.message)
197
198 # Second request with same prompt - should reuse cache
199 req2 = backend_pb2.PredictOptions(Prompt=prompt, Tokens=10)
200 resp2 = stub.Predict(req2)
201 self.assertIsNotNone(resp2.message)
202
203 print(f"Cache reuse test passed: first={len(resp1.message)} bytes, second={len(resp2.message)} bytes")
204
205 except Exception as err:
206 print(err)
207 self.fail("Cache reuse test failed")
208 finally:
209 self.tearDown()
210
211 def test_prefix_cache_reuse(self):
212 """

Callers

nothing calls this directly

Calls 4

setUpMethod · 0.95
tearDownMethod · 0.95
LoadModelMethod · 0.65
PredictMethod · 0.65

Tested by

no test coverage detected