hub / github.com/mudler/LocalAI / LoadModel

Method LoadModel

backend/python/vllm-omni/backend.py:153–272 · view source on GitHub ↗

(self, request, context)

Source from the content-addressed store, hash-verified

151	return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
152
153	def LoadModel(self, request, context):
154	try:
155	# CPU detection: if no CUDA, default vLLM target device to CPU.
156	try:
157	if not torch.cuda.is_available():
158	os.environ.setdefault("VLLM_TARGET_DEVICE", "cpu")
159	os.environ.setdefault("VLLM_CPU_KVCACHE_SPACE", "4")
160	except Exception:
161	pass
162
163	print(f"Loading model {request.Model}...", file=sys.stderr)
164	print(f"Request {request}", file=sys.stderr)
165
166	# Parse options from request.Options using shared helper
167	self.options = parse_options(request.Options)
168	opts = self.options
169
170	print(f"Options: {self.options}", file=sys.stderr)
171
172	# Detect model type
173	self.model_name = request.Model
174	self.model_type = request.Type if request.Type else self._detect_model_type(request.Model)
175	print(f"Detected model type: {self.model_type}", file=sys.stderr)
176
177	# Build DiffusionParallelConfig if diffusion model (image or video)
178	parallel_config = None
179	if self.model_type in ["image", "video"]:
180	parallel_config = DiffusionParallelConfig(
181	ulysses_degree=self.options.get("ulysses_degree", 1),
182	ring_degree=self.options.get("ring_degree", 1),
183	cfg_parallel_size=self.options.get("cfg_parallel_size", 1),
184	tensor_parallel_size=self.options.get("tensor_parallel_size", 1),
185	)
186
187	# Build cache_config dict if cache_backend specified
188	cache_backend = self.options.get("cache_backend") # "cache_dit" or "tea_cache"
189	cache_config = None
190	if cache_backend == "cache_dit":
191	cache_config = {
192	"Fn_compute_blocks": self.options.get("cache_dit_fn_compute_blocks", 1),
193	"Bn_compute_blocks": self.options.get("cache_dit_bn_compute_blocks", 0),
194	"max_warmup_steps": self.options.get("cache_dit_max_warmup_steps", 4),
195	"residual_diff_threshold": self.options.get("cache_dit_residual_diff_threshold", 0.24),
196	"max_continuous_cached_steps": self.options.get("cache_dit_max_continuous_cached_steps", 3),
197	"enable_taylorseer": self.options.get("cache_dit_enable_taylorseer", False),
198	"taylorseer_order": self.options.get("cache_dit_taylorseer_order", 1),
199	"scm_steps_mask_policy": self.options.get("cache_dit_scm_steps_mask_policy"),
200	"scm_steps_policy": self.options.get("cache_dit_scm_steps_policy", "dynamic"),
201	}
202	elif cache_backend == "tea_cache":
203	cache_config = {
204	"rel_l1_thresh": self.options.get("tea_cache_rel_l1_thresh", 0.2),
205	}
206
207	# Base Omni initialization parameters
208	omni_kwargs = {
209	"model": request.Model,
210	}

Callers

nothing calls this directly

Calls 5

_detect_model_typeMethod · 0.95

setup_parsersFunction · 0.90

parse_optionsFunction · 0.85

getMethod · 0.45

updateMethod · 0.45

Tested by

no test coverage detected