hub / github.com/mudler/LocalAI / _finalize_output

Method _finalize_output

backend/python/mlx-distributed/backend.py:489–533 · view source on GitHub ↗

(self, request, generated_text, last_response)

Source from the content-addressed store, hash-verified

487	return text[:earliest] if earliest < len(text) else text
488
489	def _finalize_output(self, request, generated_text, last_response):
490	content = generated_text
491	reasoning_content = ""
492	if getattr(self.tokenizer, "has_thinking", False):
493	think_start = getattr(self.tokenizer, "think_start", "") or ""
494	think_end = getattr(self.tokenizer, "think_end", "") or ""
495	reasoning_content, content = split_reasoning(content, think_start, think_end)
496
497	tool_calls_proto: List[backend_pb2.ToolCallDelta] = []
498	tool_module = None
499	if getattr(self.tokenizer, "has_tool_calling", False):
500	tool_module = self._tool_module_from_tokenizer()
501	if tool_module is not None:
502	parsed_tools = None
503	if request.Tools:
504	try:
505	parsed_tools = json.loads(request.Tools)
506	except json.JSONDecodeError:
507	parsed_tools = None
508	calls, content = parse_tool_calls(content, tool_module, parsed_tools)
509	for c in calls:
510	tool_calls_proto.append(
511	backend_pb2.ToolCallDelta(
512	index=c["index"], id=c["id"], name=c["name"], arguments=c["arguments"],
513	)
514	)
515
516	prompt_token_count = int(getattr(last_response, "prompt_tokens", 0) or 0) if last_response else 0
517	completion_token_count = int(getattr(last_response, "generation_tokens", 0) or 0) if last_response else 0
518
519	logprobs_bytes = b""
520	if last_response is not None and int(getattr(request, "Logprobs", 0) or 0) > 0:
521	try:
522	lp = getattr(last_response, "logprobs", None)
523	if lp is not None:
524	token_id = int(getattr(last_response, "token", 0) or 0)
525	token_text = self.tokenizer.decode([token_id]) if token_id else ""
526	top_logprob = float(lp[token_id]) if hasattr(lp, "__getitem__") else 0.0
527	logprobs_bytes = json.dumps(
528	{"content": [{"token": token_text, "logprob": top_logprob}]}
529	).encode("utf-8")
530	except Exception as e:
531	print(f"[Rank 0] Logprobs extraction failed: {e}", file=sys.stderr)
532
533	return content, reasoning_content, tool_calls_proto, prompt_token_count, completion_token_count, logprobs_bytes
534
535	def _build_generation_params(self, request, default_max_tokens=200):
536	import mlx.core as mx

Callers 2

PredictMethod · 0.95

PredictStreamMethod · 0.95

Calls 5

_tool_module_from_tokenizerMethod · 0.95

split_reasoningFunction · 0.90

parse_tool_callsFunction · 0.90

appendMethod · 0.80

decodeMethod · 0.80

Tested by

no test coverage detected