hub / github.com/mudler/LocalAI / test_prefix_cache_reuse

Method test_prefix_cache_reuse

backend/python/mlx/test.py:211–240 · view source on GitHub ↗

This method tests that prompts sharing a common prefix benefit from cached KV states.

(self)

Source from the content-addressed store, hash-verified

209	self.tearDown()
210
211	def test_prefix_cache_reuse(self):
212	"""
213	This method tests that prompts sharing a common prefix benefit from cached KV states.
214	"""
215	try:
216	self.setUp()
217	with grpc.insecure_channel("localhost:50051") as channel:
218	stub = backend_pb2_grpc.BackendStub(channel)
219	response = stub.LoadModel(backend_pb2.ModelOptions(Model="mlx-community/Llama-3.2-1B-Instruct-4bit"))
220	self.assertTrue(response.success)
221
222	# First request with base prompt
223	prompt_base = "Once upon a time in a land far away, "
224	req1 = backend_pb2.PredictOptions(Prompt=prompt_base, Tokens=10)
225	resp1 = stub.Predict(req1)
226	self.assertIsNotNone(resp1.message)
227
228	# Second request with extended prompt (same prefix)
229	prompt_extended = prompt_base + "there lived a brave knight who "
230	req2 = backend_pb2.PredictOptions(Prompt=prompt_extended, Tokens=10)
231	resp2 = stub.Predict(req2)
232	self.assertIsNotNone(resp2.message)
233
234	print(f"Prefix cache test passed: base={len(resp1.message)} bytes, extended={len(resp2.message)} bytes")
235
236	except Exception as err:
237	print(err)
238	self.fail("Prefix cache reuse test failed")
239	finally:
240	self.tearDown()
241
242
243	def test_tokenize_string(self):

nothing calls this directly

setUpMethod · 0.95

tearDownMethod · 0.95

LoadModelMethod · 0.65

PredictMethod · 0.65

no test coverage detected