MCPcopy
hub / github.com/mudler/LocalAI / LoadModel

Method LoadModel

backend/python/mlx/backend.py:54–126  ·  view source on GitHub ↗

Loads a language model using MLX. Args: request: The load model request. context: The gRPC context. Returns: backend_pb2.Result: The load model result.

(self, request, context)

Source from the content-addressed store, hash-verified

52 return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
53
54 async def LoadModel(self, request, context):
55 """
56 Loads a language model using MLX.
57
58 Args:
59 request: The load model request.
60 context: The gRPC context.
61
62 Returns:
63 backend_pb2.Result: The load model result.
64 """
65 try:
66 print(f"Loading MLX model: {request.Model}", file=sys.stderr)
67 print(f"Request: {request}", file=sys.stderr)
68
69 # Parse Options[] key:value strings into a typed dict (shared helper)
70 self.options = parse_options(request.Options)
71 print(f"Options: {self.options}", file=sys.stderr)
72
73 # Build tokenizer config for MLX using options
74 tokenizer_config = {}
75
76 # Handle trust_remote_code from request or options
77 if request.TrustRemoteCode or self.options.get("trust_remote_code", False):
78 tokenizer_config["trust_remote_code"] = True
79
80 # Handle EOS token from options
81 if "eos_token" in self.options:
82 tokenizer_config["eos_token"] = self.options["eos_token"]
83
84 # Handle other tokenizer config options
85 for key in ["pad_token", "bos_token", "unk_token", "sep_token", "cls_token", "mask_token"]:
86 if key in self.options:
87 tokenizer_config[key] = self.options[key]
88
89 # Load model and tokenizer using MLX
90 if tokenizer_config:
91 print(f"Loading with tokenizer_config: {tokenizer_config}", file=sys.stderr)
92 self.model, self.tokenizer = load(request.Model, tokenizer_config=tokenizer_config)
93 else:
94 self.model, self.tokenizer = load(request.Model)
95
96 # mlx_lm.load() returns a TokenizerWrapper that detects tool
97 # calling and thinking markers from the chat template / vocab.
98 # mlx-lm >= 0.30 also exposes a parser callable on the wrapper;
99 # earlier versions don't (we fall back to json.loads inside
100 # _tool_module_from_tokenizer below).
101 has_tools = bool(getattr(self.tokenizer, "has_tool_calling", False))
102 has_thinking = bool(getattr(self.tokenizer, "has_thinking", False))
103 tcs = getattr(self.tokenizer, "tool_call_start", None)
104 tce = getattr(self.tokenizer, "tool_call_end", None)
105 print(
106 f"MLX tokenizer capabilities: has_tool_calling={has_tools} "
107 f"has_thinking={has_thinking} tool_call_start={tcs!r} tool_call_end={tce!r}",
108 file=sys.stderr,
109 )
110
111 # Initialize thread-safe LRU prompt cache for efficient generation

Callers

nothing calls this directly

Calls 4

parse_optionsFunction · 0.90
loadFunction · 0.50
getMethod · 0.45

Tested by

no test coverage detected