(self, model, base_precision, load_device, quantization,
compile_args=None, attention_mode="sdpa", block_swap_args=None, lora=None, vram_management_args=None, extra_model=None, vace_model=None,
fantasytalking_model=None, multitalk_model=None, fantasyportrait_model=None, rms_norm_function="default")
| 1106 | CATEGORY = "WanVideoWrapper" |
| 1107 | |
| 1108 | def loadmodel(self, model, base_precision, load_device, quantization, |
| 1109 | compile_args=None, attention_mode="sdpa", block_swap_args=None, lora=None, vram_management_args=None, extra_model=None, vace_model=None, |
| 1110 | fantasytalking_model=None, multitalk_model=None, fantasyportrait_model=None, rms_norm_function="default"): |
| 1111 | assert not (vram_management_args is not None and block_swap_args is not None), "Can't use both block_swap_args and vram_management_args at the same time" |
| 1112 | if vace_model is not None: |
| 1113 | extra_model = vace_model |
| 1114 | lora_low_mem_load = merge_loras = False |
| 1115 | if lora is not None: |
| 1116 | merge_loras = any(l.get("merge_loras", True) for l in lora) |
| 1117 | lora_low_mem_load = any(l.get("low_mem_load", False) for l in lora) |
| 1118 | |
| 1119 | transformer = None |
| 1120 | mm.unload_all_models() |
| 1121 | mm.cleanup_models() |
| 1122 | mm.soft_empty_cache() |
| 1123 | |
| 1124 | if "sage" in attention_mode: |
| 1125 | try: |
| 1126 | from sageattention import sageattn |
| 1127 | except Exception as e: |
| 1128 | raise ValueError(f"Can't import SageAttention: {str(e)}") |
| 1129 | |
| 1130 | gguf = False |
| 1131 | if model.endswith(".gguf"): |
| 1132 | if quantization != "disabled": |
| 1133 | raise ValueError("Quantization should be disabled when loading GGUF models.") |
| 1134 | quantization = "gguf" |
| 1135 | gguf = True |
| 1136 | if merge_loras is True: |
| 1137 | raise ValueError("GGUF models do not support LoRA merging, please disable merge_loras in the LoRA select node.") |
| 1138 | |
| 1139 | transformer_load_device = device if load_device == "main_device" else offload_device |
| 1140 | if lora is not None and not merge_loras: |
| 1141 | transformer_load_device = offload_device |
| 1142 | |
| 1143 | base_dtype = {"fp8_e4m3fn": torch.float8_e4m3fn, "fp8_e4m3fn_fast": torch.float8_e4m3fn, "bf16": torch.bfloat16, "fp16": torch.float16, "fp16_fast": torch.float16, "fp32": torch.float32}[base_precision] |
| 1144 | |
| 1145 | if base_precision == "fp16_fast": |
| 1146 | if hasattr(torch.backends.cuda.matmul, "allow_fp16_accumulation"): |
| 1147 | torch.backends.cuda.matmul.allow_fp16_accumulation = True |
| 1148 | else: |
| 1149 | raise ValueError("torch.backends.cuda.matmul.allow_fp16_accumulation is not available in this version of torch, requires torch 2.7.0.dev2025 02 26 nightly minimum currently") |
| 1150 | else: |
| 1151 | try: |
| 1152 | if hasattr(torch.backends.cuda.matmul, "allow_fp16_accumulation"): |
| 1153 | torch.backends.cuda.matmul.allow_fp16_accumulation = False |
| 1154 | except Exception: |
| 1155 | pass |
| 1156 | |
| 1157 | |
| 1158 | model_path = folder_paths.get_full_path_or_raise("diffusion_models", model) |
| 1159 | |
| 1160 | gguf_reader = None |
| 1161 | if not gguf: |
| 1162 | sd = load_torch_file(model_path, device=transformer_load_device, safe_load=True) |
| 1163 | else: |
| 1164 | gguf_reader=[] |
| 1165 | from .gguf.gguf import load_gguf |
nothing calls this directly
no test coverage detected