MCPcopy Index your code
hub / github.com/mudler/LocalAI / Predict

Method Predict

backend/python/tinygrad/backend.py:515–562  ·  view source on GitHub ↗
(self, request, context)

Source from the content-addressed store, hash-verified

513 return backend_pb2.Result(success=False, message=f"LoadModel failed: {exc}")
514
515 async def Predict(self, request, context):
516 if self.llm_model is None:
517 context.set_code(grpc.StatusCode.FAILED_PRECONDITION)
518 context.set_details("LLM not loaded")
519 return backend_pb2.Reply()
520
521 try:
522 prompt = self._render_prompt(request)
523 max_new = request.Tokens if request.Tokens > 0 else 256
524 temperature = request.Temperature if request.Temperature > 0 else 0.7
525
526 t0 = time.monotonic()
527 pieces: list[str] = []
528 ntok = 0
529 for _, text in self._generate_tokens(prompt, max_new, temperature):
530 pieces.append(text)
531 ntok += 1
532 elapsed = time.monotonic() - t0
533
534 full = "".join(pieces)
535 from tool_parsers.hermes import HermesToolParser
536 if isinstance(self.tool_parser, HermesToolParser):
537 result = self.tool_parser.parse_full(full)
538 content, calls, reasoning = result.content, result.tool_calls, result.reasoning
539 else:
540 content, calls = self.tool_parser.parse(full)
541 reasoning = ""
542
543 delta = backend_pb2.ChatDelta(
544 content=content,
545 reasoning_content=reasoning,
546 tool_calls=[
547 backend_pb2.ToolCallDelta(index=c.index, id=c.id, name=c.name, arguments=c.arguments)
548 for c in calls
549 ],
550 )
551 return backend_pb2.Reply(
552 message=content.encode("utf-8"),
553 tokens=ntok,
554 timing_token_generation=elapsed,
555 chat_deltas=[delta],
556 )
557 except Exception as exc:
558 import traceback
559 traceback.print_exc()
560 context.set_code(grpc.StatusCode.INTERNAL)
561 context.set_details(f"Predict failed: {exc}")
562 return backend_pb2.Reply()
563
564 async def PredictStream(self, request, context):
565 if self.llm_model is None:

Callers

nothing calls this directly

Calls 7

_render_promptMethod · 0.95
_generate_tokensMethod · 0.95
appendMethod · 0.80
parse_fullMethod · 0.80
parseMethod · 0.65
set_codeMethod · 0.45
set_detailsMethod · 0.45

Tested by

no test coverage detected