(uid, model_dir, hf_config)
| 1038 | return t_out |
| 1039 | |
| 1040 | def load_from_model_dir(uid, model_dir, hf_config): |
| 1041 | if uid not in self._cpp_lora_weights: |
| 1042 | self._cpp_lora_weights[uid] = [] # Will be converted to tensor later |
| 1043 | if uid not in self._cpp_lora_config: |
| 1044 | self._cpp_lora_config[uid] = [] # Will be converted to tensor later |
| 1045 | |
| 1046 | lora_model = load_state_dict(get_model_path(model_dir, "adapter_model")) |
| 1047 | if lora_model is None: |
| 1048 | raise ValueError(f"Failed to load adapter_model from {model_dir}") |
| 1049 | lora_model = preprocess_lora_weights(lora_model, model_config) |
| 1050 | all_weights = get_all_hf_lora_weights(lora_model, hf_modules, component) |
| 1051 | rank = int(hf_config["r"]) |
| 1052 | rs_lora = bool(hf_config.get("use_rslora", False)) |
| 1053 | |
| 1054 | self._lora_uid_to_low_ranks[uid] = {} |
| 1055 | self._lora_weights_pointers_list[uid] = {} |
| 1056 | for layer_idx in sorted(all_weights.keys()): |
| 1057 | layer_weights = all_weights[layer_idx] |
| 1058 | self._lora_uid_to_low_ranks[uid][layer_idx] = {} |
| 1059 | self._lora_weights_pointers_list[uid][layer_idx] = {} |
| 1060 | |
| 1061 | for lora_module in self.missing_qkv_modules: |
| 1062 | hf_module = model_config.trtllm_modules_to_hf_modules[lora_module] |
| 1063 | if isinstance(hf_module, list): |
| 1064 | hf_module = hf_module[0] |
| 1065 | layer_weights[hf_module] = { |
| 1066 | "in": torch.zeros(rank, model_config.hidden_size), |
| 1067 | "out": torch.zeros(model_config.hidden_size, rank), |
| 1068 | } |
| 1069 | |
| 1070 | for hf_module, module_weights in layer_weights.items(): |
| 1071 | lora_module = hf_modules_to_trtllm_modules[hf_module] |
| 1072 | if lora_module not in self.lora_target_modules: |
| 1073 | warnings.warn( |
| 1074 | f"LoRA module '{lora_module}' not in target modules {self.lora_target_modules}, skipping." |
| 1075 | ) |
| 1076 | self._lora_uid_to_low_ranks[uid][layer_idx][lora_module] = 0 |
| 1077 | continue |
| 1078 | |
| 1079 | has_expert_indices = _is_moe_module_weights(module_weights) |
| 1080 | |
| 1081 | if has_expert_indices: # MoE |
| 1082 | # Validate and extract matrices in one pass |
| 1083 | expert_indices = sorted(module_weights.keys()) |
| 1084 | t_in_list, t_out_list = [], [] |
| 1085 | for expert_idx in expert_indices: |
| 1086 | expert_weights = module_weights[expert_idx] |
| 1087 | _check_lora_in_out( |
| 1088 | layer_idx=layer_idx, |
| 1089 | lora_module=f"{lora_module}_expert_{expert_idx}", |
| 1090 | available_matrices=expert_weights, |
| 1091 | source_identifier=f"directory {model_dir}", |
| 1092 | ) |
| 1093 | t_in_list.append(expert_weights["in"]) |
| 1094 | t_out_list.append(expert_weights["out"]) |
| 1095 | |
| 1096 | t_in = torch.stack(t_in_list) |
| 1097 | t_out = torch.stack(t_out_list) |
nothing calls this directly
no test coverage detected