MCPcopy
hub / github.com/mudler/LocalAI / _finalize_output

Method _finalize_output

backend/python/mlx/backend.py:605–667  ·  view source on GitHub ↗

Build a ChatDelta + token counts + logprobs from accumulated output. Returns ``(content, reasoning_content, tool_calls_proto, prompt_token_count, completion_token_count, logprobs_bytes)``.

(self, request, generated_text, last_response)

Source from the content-addressed store, hash-verified

603 )
604
605 def _finalize_output(self, request, generated_text, last_response):
606 """Build a ChatDelta + token counts + logprobs from accumulated output.
607
608 Returns ``(content, reasoning_content, tool_calls_proto,
609 prompt_token_count, completion_token_count, logprobs_bytes)``.
610 """
611 content = generated_text
612 reasoning_content = ""
613
614 if getattr(self.tokenizer, "has_thinking", False):
615 think_start = getattr(self.tokenizer, "think_start", "") or ""
616 think_end = getattr(self.tokenizer, "think_end", "") or ""
617 reasoning_content, content = split_reasoning(content, think_start, think_end)
618
619 tool_calls_proto: List[backend_pb2.ToolCallDelta] = []
620 tool_module = None
621 if getattr(self.tokenizer, "has_tool_calling", False):
622 tool_module = self._tool_module_from_tokenizer()
623 if tool_module is not None:
624 parsed_tools = None
625 if request.Tools:
626 try:
627 parsed_tools = json.loads(request.Tools)
628 except json.JSONDecodeError:
629 parsed_tools = None
630 calls, content = parse_tool_calls(content, tool_module, parsed_tools)
631 for c in calls:
632 tool_calls_proto.append(
633 backend_pb2.ToolCallDelta(
634 index=c["index"],
635 id=c["id"],
636 name=c["name"],
637 arguments=c["arguments"],
638 )
639 )
640
641 prompt_token_count = int(getattr(last_response, "prompt_tokens", 0) or 0) if last_response else 0
642 completion_token_count = int(getattr(last_response, "generation_tokens", 0) or 0) if last_response else 0
643
644 logprobs_bytes = b""
645 # Logprobs extraction — only when the request asked for them.
646 if last_response is not None and int(getattr(request, "Logprobs", 0) or 0) > 0:
647 try:
648 lp = getattr(last_response, "logprobs", None)
649 if lp is not None:
650 # GenerationResponse.logprobs on the last chunk is the
651 # logprob distribution of the final token. Without a
652 # per-token history we at minimum surface the last token's
653 # top-1 logprob so clients get a non-empty field.
654 token_id = int(getattr(last_response, "token", 0) or 0)
655 token_text = self.tokenizer.decode([token_id]) if token_id else ""
656 top_logprob = float(lp[token_id]) if hasattr(lp, "__getitem__") else 0.0
657 logprobs_bytes = json.dumps(
658 {
659 "content": [
660 {"token": token_text, "logprob": top_logprob}
661 ]
662 }

Callers 2

PredictMethod · 0.95
PredictStreamMethod · 0.95

Calls 5

split_reasoningFunction · 0.90
parse_tool_callsFunction · 0.90
appendMethod · 0.80
decodeMethod · 0.80

Tested by

no test coverage detected