hub / github.com/mudler/LocalAI / _load_pipeline

Method _load_pipeline

backend/python/diffusers/backend.py:218–492 · view source on GitHub ↗

Load a diffusers pipeline dynamically using the dynamic loader. This method uses load_diffusers_pipeline() for most pipelines, falling back to explicit handling only for pipelines requiring custom initialization (e.g., quantization, special VAE handling). A

(self, request, modelFile, fromSingleFile, torchType, variant, device_map=None)

Source from the content-addressed store, hash-verified

216	class BackendServicer(backend_pb2_grpc.BackendServicer):
217
218	def _load_pipeline(self, request, modelFile, fromSingleFile, torchType, variant, device_map=None):
219	"""
220	Load a diffusers pipeline dynamically using the dynamic loader.
221
222	This method uses load_diffusers_pipeline() for most pipelines, falling back
223	to explicit handling only for pipelines requiring custom initialization
224	(e.g., quantization, special VAE handling).
225
226	Args:
227	request: The gRPC request containing pipeline configuration
228	modelFile: Path to the model file (for single file loading)
229	fromSingleFile: Whether to use from_single_file() vs from_pretrained()
230	torchType: The torch dtype to use
231	variant: Model variant (e.g., "fp16")
232	device_map: Device mapping strategy (e.g., "auto" for multi-GPU)
233
234	Returns:
235	The loaded pipeline instance
236	"""
237	pipeline_type = request.PipelineType
238
239	# Handle IMG2IMG request flag with default pipeline
240	if request.IMG2IMG and pipeline_type == "":
241	pipeline_type = "StableDiffusionImg2ImgPipeline"
242
243	# ================================================================
244	# Special cases requiring custom initialization logic
245	# Only handle pipelines that truly need custom code (quantization,
246	# special VAE handling, etc.). All other pipelines use dynamic loading.
247	# ================================================================
248
249	# FluxTransformer2DModel - requires quantization and custom transformer loading
250	if pipeline_type == "FluxTransformer2DModel":
251	dtype = torch.bfloat16
252	bfl_repo = os.environ.get("BFL_REPO", "ChuckMcSneed/FLUX.1-dev")
253
254	transformer = FluxTransformer2DModel.from_single_file(modelFile, torch_dtype=dtype, device_map=device_map)
255	quantize(transformer, weights=qfloat8)
256	freeze(transformer)
257	text_encoder_2 = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype, device_map=device_map)
258	quantize(text_encoder_2, weights=qfloat8)
259	freeze(text_encoder_2)
260
261	pipe = FluxPipeline.from_pretrained(bfl_repo, transformer=None, text_encoder_2=None, torch_dtype=dtype, device_map=device_map)
262	pipe.transformer = transformer
263	pipe.text_encoder_2 = text_encoder_2
264
265	if request.LowVRAM:
266	pipe.enable_model_cpu_offload()
267	return pipe
268
269	# WanPipeline - requires special VAE with float32 dtype
270	if pipeline_type == "WanPipeline":
271	vae = AutoencoderKLWan.from_pretrained(
272	request.Model,
273	subfolder="vae",
274	torch_dtype=torch.float32,
275	device_map=device_map

Callers 1

LoadModelMethod · 0.95

Calls 4

load_diffusers_pipelineFunction · 0.90

get_available_pipelinesFunction · 0.90

getMethod · 0.45

toMethod · 0.45

Tested by

no test coverage detected