MCPcopy
hub / github.com/mudler/LocalAI / _finalize_output

Method _finalize_output

backend/python/mlx-distributed/backend.py:489–533  ·  view source on GitHub ↗
(self, request, generated_text, last_response)

Source from the content-addressed store, hash-verified

487 return text[:earliest] if earliest < len(text) else text
488
489 def _finalize_output(self, request, generated_text, last_response):
490 content = generated_text
491 reasoning_content = ""
492 if getattr(self.tokenizer, "has_thinking", False):
493 think_start = getattr(self.tokenizer, "think_start", "") or ""
494 think_end = getattr(self.tokenizer, "think_end", "") or ""
495 reasoning_content, content = split_reasoning(content, think_start, think_end)
496
497 tool_calls_proto: List[backend_pb2.ToolCallDelta] = []
498 tool_module = None
499 if getattr(self.tokenizer, "has_tool_calling", False):
500 tool_module = self._tool_module_from_tokenizer()
501 if tool_module is not None:
502 parsed_tools = None
503 if request.Tools:
504 try:
505 parsed_tools = json.loads(request.Tools)
506 except json.JSONDecodeError:
507 parsed_tools = None
508 calls, content = parse_tool_calls(content, tool_module, parsed_tools)
509 for c in calls:
510 tool_calls_proto.append(
511 backend_pb2.ToolCallDelta(
512 index=c["index"], id=c["id"], name=c["name"], arguments=c["arguments"],
513 )
514 )
515
516 prompt_token_count = int(getattr(last_response, "prompt_tokens", 0) or 0) if last_response else 0
517 completion_token_count = int(getattr(last_response, "generation_tokens", 0) or 0) if last_response else 0
518
519 logprobs_bytes = b""
520 if last_response is not None and int(getattr(request, "Logprobs", 0) or 0) > 0:
521 try:
522 lp = getattr(last_response, "logprobs", None)
523 if lp is not None:
524 token_id = int(getattr(last_response, "token", 0) or 0)
525 token_text = self.tokenizer.decode([token_id]) if token_id else ""
526 top_logprob = float(lp[token_id]) if hasattr(lp, "__getitem__") else 0.0
527 logprobs_bytes = json.dumps(
528 {"content": [{"token": token_text, "logprob": top_logprob}]}
529 ).encode("utf-8")
530 except Exception as e:
531 print(f"[Rank 0] Logprobs extraction failed: {e}", file=sys.stderr)
532
533 return content, reasoning_content, tool_calls_proto, prompt_token_count, completion_token_count, logprobs_bytes
534
535 def _build_generation_params(self, request, default_max_tokens=200):
536 import mlx.core as mx

Callers 2

PredictMethod · 0.95
PredictStreamMethod · 0.95

Calls 5

split_reasoningFunction · 0.90
parse_tool_callsFunction · 0.90
appendMethod · 0.80
decodeMethod · 0.80

Tested by

no test coverage detected