MCPcopy
hub / github.com/mudler/LocalAI / LoadModel

Method LoadModel

backend/python/vllm-omni/backend.py:153–272  ·  view source on GitHub ↗
(self, request, context)

Source from the content-addressed store, hash-verified

151 return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
152
153 def LoadModel(self, request, context):
154 try:
155 # CPU detection: if no CUDA, default vLLM target device to CPU.
156 try:
157 if not torch.cuda.is_available():
158 os.environ.setdefault("VLLM_TARGET_DEVICE", "cpu")
159 os.environ.setdefault("VLLM_CPU_KVCACHE_SPACE", "4")
160 except Exception:
161 pass
162
163 print(f"Loading model {request.Model}...", file=sys.stderr)
164 print(f"Request {request}", file=sys.stderr)
165
166 # Parse options from request.Options using shared helper
167 self.options = parse_options(request.Options)
168 opts = self.options
169
170 print(f"Options: {self.options}", file=sys.stderr)
171
172 # Detect model type
173 self.model_name = request.Model
174 self.model_type = request.Type if request.Type else self._detect_model_type(request.Model)
175 print(f"Detected model type: {self.model_type}", file=sys.stderr)
176
177 # Build DiffusionParallelConfig if diffusion model (image or video)
178 parallel_config = None
179 if self.model_type in ["image", "video"]:
180 parallel_config = DiffusionParallelConfig(
181 ulysses_degree=self.options.get("ulysses_degree", 1),
182 ring_degree=self.options.get("ring_degree", 1),
183 cfg_parallel_size=self.options.get("cfg_parallel_size", 1),
184 tensor_parallel_size=self.options.get("tensor_parallel_size", 1),
185 )
186
187 # Build cache_config dict if cache_backend specified
188 cache_backend = self.options.get("cache_backend") # "cache_dit" or "tea_cache"
189 cache_config = None
190 if cache_backend == "cache_dit":
191 cache_config = {
192 "Fn_compute_blocks": self.options.get("cache_dit_fn_compute_blocks", 1),
193 "Bn_compute_blocks": self.options.get("cache_dit_bn_compute_blocks", 0),
194 "max_warmup_steps": self.options.get("cache_dit_max_warmup_steps", 4),
195 "residual_diff_threshold": self.options.get("cache_dit_residual_diff_threshold", 0.24),
196 "max_continuous_cached_steps": self.options.get("cache_dit_max_continuous_cached_steps", 3),
197 "enable_taylorseer": self.options.get("cache_dit_enable_taylorseer", False),
198 "taylorseer_order": self.options.get("cache_dit_taylorseer_order", 1),
199 "scm_steps_mask_policy": self.options.get("cache_dit_scm_steps_mask_policy"),
200 "scm_steps_policy": self.options.get("cache_dit_scm_steps_policy", "dynamic"),
201 }
202 elif cache_backend == "tea_cache":
203 cache_config = {
204 "rel_l1_thresh": self.options.get("tea_cache_rel_l1_thresh", 0.2),
205 }
206
207 # Base Omni initialization parameters
208 omni_kwargs = {
209 "model": request.Model,
210 }

Callers

nothing calls this directly

Calls 5

_detect_model_typeMethod · 0.95
setup_parsersFunction · 0.90
parse_optionsFunction · 0.85
getMethod · 0.45
updateMethod · 0.45

Tested by

no test coverage detected