MCPcopy Index your code
hub / github.com/NVIDIA/TensorRT-LLM / load_from_nemo

Method load_from_nemo

tensorrt_llm/lora_manager.py:769–875  ·  view source on GitHub ↗

Returns the adapter UIDs that were loaded by this call. Note that when an adapter was already loaded before this call, it would not be included in the returned list of UIDs.

(
        self,
        model_files: List[str],
        model_config: Union["ModelConfig", LoraModelConfig],
        uids: Optional[List[str]] = None,
    )

Source from the content-addressed store, hash-verified

767 assert False, f"{self.__class__.__name__} does not support source {ckpt_source}"
768
769 def load_from_nemo(
770 self,
771 model_files: List[str],
772 model_config: Union["ModelConfig", LoraModelConfig],
773 uids: Optional[List[str]] = None,
774 ) -> List[str]:
775 """Returns the adapter UIDs that were loaded by this call.
776
777 Note that when an adapter was already loaded before this call, it would not be
778 included in the returned list of UIDs.
779 """
780 if uids is None:
781 uids = [self._generate_uid() for _ in range(len(model_files))]
782 assert len(uids) == len(model_files)
783
784 new_uids, new_model_files = [], []
785 for uid, model_file in zip(uids, model_files):
786 if uid in self._lora_uid_to_low_ranks:
787 continue
788 new_uids.append(uid)
789 new_model_files.append(model_file)
790
791 if len(new_uids) == 0:
792 return new_uids
793
794 self.lora_target_modules = model_config.lora_target_modules
795
796 def load_from_model_file(uid, model_file):
797 if uid not in self._cpp_lora_weights:
798 self._cpp_lora_weights[uid] = [] # Will be converted to tensor later
799 if uid not in self._cpp_lora_config:
800 self._cpp_lora_config[uid] = [] # Will be converted to tensor later
801
802 _, nemo_weights = unpack_nemo_weights(model_file)
803 all_lora_weights = get_all_nemo_lora_weights(nemo_weights)
804
805 self._lora_uid_to_low_ranks[uid] = {}
806 self._lora_weights_pointers_list[uid] = {}
807 for layer_idx in sorted(all_lora_weights.keys()):
808 self._lora_uid_to_low_ranks[uid][layer_idx] = {}
809 self._lora_weights_pointers_list[uid][layer_idx] = {}
810
811 for lora_module in self.lora_target_modules:
812 if lora_module not in NEMO_SUPPORTED_LORA_MODULES:
813 warnings.warn(
814 f"LoRA module '{lora_module}' not supported in NeMo loading for "
815 f"layer {layer_idx}, skipping. NeMo LoRA currently only supports "
816 f"{NEMO_SUPPORTED_LORA_MODULES} modules."
817 )
818 self._lora_uid_to_low_ranks[uid][layer_idx][lora_module] = 0
819 continue
820
821 if lora_module == "attn_qkv":
822 # Validate required matrices are present
823 _check_lora_in_out(
824 layer_idx=layer_idx,
825 lora_module=lora_module,
826 available_matrices=all_lora_weights[layer_idx],

Callers 1

load_from_ckptMethod · 0.95

Calls 4

_generate_uidMethod · 0.95
release_gcFunction · 0.85
appendMethod · 0.45
infoMethod · 0.45

Tested by

no test coverage detected