MCPcopy Index your code
hub / github.com/mudler/LocalAI / LoadModel

Method LoadModel

backend/python/vllm/backend.py:189–292  ·  view source on GitHub ↗

Loads a language model. Args: request: The load model request. context: The gRPC context. Returns: backend_pb2.Result: The load model result.

(self, request, context)

Source from the content-addressed store, hash-verified

187 return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
188
189 async def LoadModel(self, request, context):
190 """
191 Loads a language model.
192
193 Args:
194 request: The load model request.
195 context: The gRPC context.
196
197 Returns:
198 backend_pb2.Result: The load model result.
199 """
200 engine_args = AsyncEngineArgs(
201 model=request.Model,
202 )
203
204 if request.Quantization != "":
205 engine_args.quantization = request.Quantization
206 if request.LoadFormat != "":
207 engine_args.load_format = request.LoadFormat
208 if request.GPUMemoryUtilization != 0:
209 engine_args.gpu_memory_utilization = request.GPUMemoryUtilization
210 if request.TrustRemoteCode:
211 engine_args.trust_remote_code = request.TrustRemoteCode
212 if request.EnforceEager:
213 engine_args.enforce_eager = request.EnforceEager
214 if request.TensorParallelSize:
215 engine_args.tensor_parallel_size = request.TensorParallelSize
216 if request.SwapSpace != 0:
217 engine_args.swap_space = request.SwapSpace
218 if request.MaxModelLen != 0:
219 engine_args.max_model_len = request.MaxModelLen
220 if request.DisableLogStatus:
221 engine_args.disable_log_status = request.DisableLogStatus
222 if request.DType != "":
223 engine_args.dtype = request.DType
224 if request.LimitImagePerPrompt != 0 or request.LimitVideoPerPrompt != 0 or request.LimitAudioPerPrompt != 0:
225 # limit-mm-per-prompt defaults to 1 per modality, based on vLLM docs
226 engine_args.limit_mm_per_prompt = {
227 "image": max(request.LimitImagePerPrompt, 1),
228 "video": max(request.LimitVideoPerPrompt, 1),
229 "audio": max(request.LimitAudioPerPrompt, 1)
230 }
231
232 # engine_args from YAML overrides typed fields above so operators can
233 # tune anything the AsyncEngineArgs dataclass exposes without waiting
234 # on protobuf changes.
235 try:
236 engine_args = self._apply_engine_args(engine_args, request.EngineArgs)
237 except ValueError as err:
238 print(f"engine_args error: {err}", file=sys.stderr)
239 return backend_pb2.Result(success=False, message=str(err))
240
241 try:
242 self.llm = AsyncLLMEngine.from_engine_args(engine_args)
243 except Exception as err:
244 print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)
245 return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
246

Callers

nothing calls this directly

Calls 3

_apply_engine_argsMethod · 0.95
_parse_optionsMethod · 0.95
getMethod · 0.45

Tested by

no test coverage detected