hub / github.com/mudler/LocalAI / _finalize_output

Method _finalize_output

backend/python/mlx/backend.py:605–667 · view source on GitHub ↗

Build a ChatDelta + token counts + logprobs from accumulated output. Returns ``(content, reasoning_content, tool_calls_proto, prompt_token_count, completion_token_count, logprobs_bytes)``.

(self, request, generated_text, last_response)

Source from the content-addressed store, hash-verified

603	)
604
605	def _finalize_output(self, request, generated_text, last_response):
606	"""Build a ChatDelta + token counts + logprobs from accumulated output.
607
608	Returns ``(content, reasoning_content, tool_calls_proto,
609	prompt_token_count, completion_token_count, logprobs_bytes)``.
610	"""
611	content = generated_text
612	reasoning_content = ""
613
614	if getattr(self.tokenizer, "has_thinking", False):
615	think_start = getattr(self.tokenizer, "think_start", "") or ""
616	think_end = getattr(self.tokenizer, "think_end", "") or ""
617	reasoning_content, content = split_reasoning(content, think_start, think_end)
618
619	tool_calls_proto: List[backend_pb2.ToolCallDelta] = []
620	tool_module = None
621	if getattr(self.tokenizer, "has_tool_calling", False):
622	tool_module = self._tool_module_from_tokenizer()
623	if tool_module is not None:
624	parsed_tools = None
625	if request.Tools:
626	try:
627	parsed_tools = json.loads(request.Tools)
628	except json.JSONDecodeError:
629	parsed_tools = None
630	calls, content = parse_tool_calls(content, tool_module, parsed_tools)
631	for c in calls:
632	tool_calls_proto.append(
633	backend_pb2.ToolCallDelta(
634	index=c["index"],
635	id=c["id"],
636	name=c["name"],
637	arguments=c["arguments"],
638	)
639	)
640
641	prompt_token_count = int(getattr(last_response, "prompt_tokens", 0) or 0) if last_response else 0
642	completion_token_count = int(getattr(last_response, "generation_tokens", 0) or 0) if last_response else 0
643
644	logprobs_bytes = b""
645	# Logprobs extraction — only when the request asked for them.
646	if last_response is not None and int(getattr(request, "Logprobs", 0) or 0) > 0:
647	try:
648	lp = getattr(last_response, "logprobs", None)
649	if lp is not None:
650	# GenerationResponse.logprobs on the last chunk is the
651	# logprob distribution of the final token. Without a
652	# per-token history we at minimum surface the last token's
653	# top-1 logprob so clients get a non-empty field.
654	token_id = int(getattr(last_response, "token", 0) or 0)
655	token_text = self.tokenizer.decode([token_id]) if token_id else ""
656	top_logprob = float(lp[token_id]) if hasattr(lp, "__getitem__") else 0.0
657	logprobs_bytes = json.dumps(
658	{
659	"content": [
660	{"token": token_text, "logprob": top_logprob}
661	]
662	}

Callers 2

PredictMethod · 0.95

PredictStreamMethod · 0.95

Calls 5

_tool_module_from_tokenizerMethod · 0.95

split_reasoningFunction · 0.90

parse_tool_callsFunction · 0.90

appendMethod · 0.80

decodeMethod · 0.80

Tested by

no test coverage detected