Load a diffusers pipeline dynamically using the dynamic loader. This method uses load_diffusers_pipeline() for most pipelines, falling back to explicit handling only for pipelines requiring custom initialization (e.g., quantization, special VAE handling). A
(self, request, modelFile, fromSingleFile, torchType, variant, device_map=None)
| 216 | class BackendServicer(backend_pb2_grpc.BackendServicer): |
| 217 | |
| 218 | def _load_pipeline(self, request, modelFile, fromSingleFile, torchType, variant, device_map=None): |
| 219 | """ |
| 220 | Load a diffusers pipeline dynamically using the dynamic loader. |
| 221 | |
| 222 | This method uses load_diffusers_pipeline() for most pipelines, falling back |
| 223 | to explicit handling only for pipelines requiring custom initialization |
| 224 | (e.g., quantization, special VAE handling). |
| 225 | |
| 226 | Args: |
| 227 | request: The gRPC request containing pipeline configuration |
| 228 | modelFile: Path to the model file (for single file loading) |
| 229 | fromSingleFile: Whether to use from_single_file() vs from_pretrained() |
| 230 | torchType: The torch dtype to use |
| 231 | variant: Model variant (e.g., "fp16") |
| 232 | device_map: Device mapping strategy (e.g., "auto" for multi-GPU) |
| 233 | |
| 234 | Returns: |
| 235 | The loaded pipeline instance |
| 236 | """ |
| 237 | pipeline_type = request.PipelineType |
| 238 | |
| 239 | # Handle IMG2IMG request flag with default pipeline |
| 240 | if request.IMG2IMG and pipeline_type == "": |
| 241 | pipeline_type = "StableDiffusionImg2ImgPipeline" |
| 242 | |
| 243 | # ================================================================ |
| 244 | # Special cases requiring custom initialization logic |
| 245 | # Only handle pipelines that truly need custom code (quantization, |
| 246 | # special VAE handling, etc.). All other pipelines use dynamic loading. |
| 247 | # ================================================================ |
| 248 | |
| 249 | # FluxTransformer2DModel - requires quantization and custom transformer loading |
| 250 | if pipeline_type == "FluxTransformer2DModel": |
| 251 | dtype = torch.bfloat16 |
| 252 | bfl_repo = os.environ.get("BFL_REPO", "ChuckMcSneed/FLUX.1-dev") |
| 253 | |
| 254 | transformer = FluxTransformer2DModel.from_single_file(modelFile, torch_dtype=dtype, device_map=device_map) |
| 255 | quantize(transformer, weights=qfloat8) |
| 256 | freeze(transformer) |
| 257 | text_encoder_2 = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype, device_map=device_map) |
| 258 | quantize(text_encoder_2, weights=qfloat8) |
| 259 | freeze(text_encoder_2) |
| 260 | |
| 261 | pipe = FluxPipeline.from_pretrained(bfl_repo, transformer=None, text_encoder_2=None, torch_dtype=dtype, device_map=device_map) |
| 262 | pipe.transformer = transformer |
| 263 | pipe.text_encoder_2 = text_encoder_2 |
| 264 | |
| 265 | if request.LowVRAM: |
| 266 | pipe.enable_model_cpu_offload() |
| 267 | return pipe |
| 268 | |
| 269 | # WanPipeline - requires special VAE with float32 dtype |
| 270 | if pipeline_type == "WanPipeline": |
| 271 | vae = AutoencoderKLWan.from_pretrained( |
| 272 | request.Model, |
| 273 | subfolder="vae", |
| 274 | torch_dtype=torch.float32, |
| 275 | device_map=device_map |
no test coverage detected