MCPcopy
hub / github.com/mudler/LocalAI / test_prefix_cache_reuse

Method test_prefix_cache_reuse

backend/python/mlx/test.py:211–240  ·  view source on GitHub ↗

This method tests that prompts sharing a common prefix benefit from cached KV states.

(self)

Source from the content-addressed store, hash-verified

209 self.tearDown()
210
211 def test_prefix_cache_reuse(self):
212 """
213 This method tests that prompts sharing a common prefix benefit from cached KV states.
214 """
215 try:
216 self.setUp()
217 with grpc.insecure_channel("localhost:50051") as channel:
218 stub = backend_pb2_grpc.BackendStub(channel)
219 response = stub.LoadModel(backend_pb2.ModelOptions(Model="mlx-community/Llama-3.2-1B-Instruct-4bit"))
220 self.assertTrue(response.success)
221
222 # First request with base prompt
223 prompt_base = "Once upon a time in a land far away, "
224 req1 = backend_pb2.PredictOptions(Prompt=prompt_base, Tokens=10)
225 resp1 = stub.Predict(req1)
226 self.assertIsNotNone(resp1.message)
227
228 # Second request with extended prompt (same prefix)
229 prompt_extended = prompt_base + "there lived a brave knight who "
230 req2 = backend_pb2.PredictOptions(Prompt=prompt_extended, Tokens=10)
231 resp2 = stub.Predict(req2)
232 self.assertIsNotNone(resp2.message)
233
234 print(f"Prefix cache test passed: base={len(resp1.message)} bytes, extended={len(resp2.message)} bytes")
235
236 except Exception as err:
237 print(err)
238 self.fail("Prefix cache reuse test failed")
239 finally:
240 self.tearDown()
241
242
243 def test_tokenize_string(self):

Callers

nothing calls this directly

Calls 4

setUpMethod · 0.95
tearDownMethod · 0.95
LoadModelMethod · 0.65
PredictMethod · 0.65

Tested by

no test coverage detected