MCPcopy
hub / github.com/mudler/LocalAI / PredictStream

Method PredictStream

backend/python/tinygrad/backend.py:564–607  ·  view source on GitHub ↗
(self, request, context)

Source from the content-addressed store, hash-verified

562 return backend_pb2.Reply()
563
564 async def PredictStream(self, request, context):
565 if self.llm_model is None:
566 context.set_code(grpc.StatusCode.FAILED_PRECONDITION)
567 context.set_details("LLM not loaded")
568 return
569
570 try:
571 prompt = self._render_prompt(request)
572 max_new = request.Tokens if request.Tokens > 0 else 256
573 temperature = request.Temperature if request.Temperature > 0 else 0.7
574
575 buffer = ""
576 for _, text in self._generate_tokens(prompt, max_new, temperature):
577 buffer += text
578 yield backend_pb2.Reply(
579 message=text.encode("utf-8"),
580 chat_deltas=[backend_pb2.ChatDelta(content=text)],
581 )
582
583 # Final emission carries the extracted tool calls (vLLM semantics).
584 from tool_parsers.hermes import HermesToolParser
585 if isinstance(self.tool_parser, HermesToolParser):
586 result = self.tool_parser.parse_full(buffer)
587 calls = result.tool_calls
588 reasoning = result.reasoning
589 else:
590 _, calls = self.tool_parser.parse(buffer)
591 reasoning = ""
592
593 if calls or reasoning:
594 yield backend_pb2.Reply(
595 chat_deltas=[backend_pb2.ChatDelta(
596 reasoning_content=reasoning,
597 tool_calls=[
598 backend_pb2.ToolCallDelta(index=c.index, id=c.id, name=c.name, arguments=c.arguments)
599 for c in calls
600 ],
601 )],
602 )
603 except Exception as exc:
604 import traceback
605 traceback.print_exc()
606 context.set_code(grpc.StatusCode.INTERNAL)
607 context.set_details(f"PredictStream failed: {exc}")
608
609 async def Embedding(self, request, context):
610 if self.llm_model is None or self.llm_tokenizer is None:

Callers

nothing calls this directly

Calls 6

_render_promptMethod · 0.95
_generate_tokensMethod · 0.95
parse_fullMethod · 0.80
parseMethod · 0.65
set_codeMethod · 0.45
set_detailsMethod · 0.45

Tested by

no test coverage detected