hub / github.com/NVIDIA/TensorRT-LLM / load_from_nemo

Method load_from_nemo

tensorrt_llm/lora_manager.py:769–875 · view source on GitHub ↗

Returns the adapter UIDs that were loaded by this call. Note that when an adapter was already loaded before this call, it would not be included in the returned list of UIDs.

(
        self,
        model_files: List[str],
        model_config: Union["ModelConfig", LoraModelConfig],
        uids: Optional[List[str]] = None,
    )

Source from the content-addressed store, hash-verified

767	assert False, f"{self.__class__.__name__} does not support source {ckpt_source}"
768
769	def load_from_nemo(
770	self,
771	model_files: List[str],
772	model_config: Union["ModelConfig", LoraModelConfig],
773	uids: Optional[List[str]] = None,
774	) -> List[str]:
775	"""Returns the adapter UIDs that were loaded by this call.
776
777	Note that when an adapter was already loaded before this call, it would not be
778	included in the returned list of UIDs.
779	"""
780	if uids is None:
781	uids = [self._generate_uid() for _ in range(len(model_files))]
782	assert len(uids) == len(model_files)
783
784	new_uids, new_model_files = [], []
785	for uid, model_file in zip(uids, model_files):
786	if uid in self._lora_uid_to_low_ranks:
787	continue
788	new_uids.append(uid)
789	new_model_files.append(model_file)
790
791	if len(new_uids) == 0:
792	return new_uids
793
794	self.lora_target_modules = model_config.lora_target_modules
795
796	def load_from_model_file(uid, model_file):
797	if uid not in self._cpp_lora_weights:
798	self._cpp_lora_weights[uid] = [] # Will be converted to tensor later
799	if uid not in self._cpp_lora_config:
800	self._cpp_lora_config[uid] = [] # Will be converted to tensor later
801
802	_, nemo_weights = unpack_nemo_weights(model_file)
803	all_lora_weights = get_all_nemo_lora_weights(nemo_weights)
804
805	self._lora_uid_to_low_ranks[uid] = {}
806	self._lora_weights_pointers_list[uid] = {}
807	for layer_idx in sorted(all_lora_weights.keys()):
808	self._lora_uid_to_low_ranks[uid][layer_idx] = {}
809	self._lora_weights_pointers_list[uid][layer_idx] = {}
810
811	for lora_module in self.lora_target_modules:
812	if lora_module not in NEMO_SUPPORTED_LORA_MODULES:
813	warnings.warn(
814	f"LoRA module '{lora_module}' not supported in NeMo loading for "
815	f"layer {layer_idx}, skipping. NeMo LoRA currently only supports "
816	f"{NEMO_SUPPORTED_LORA_MODULES} modules."
817	)
818	self._lora_uid_to_low_ranks[uid][layer_idx][lora_module] = 0
819	continue
820
821	if lora_module == "attn_qkv":
822	# Validate required matrices are present
823	_check_lora_in_out(
824	layer_idx=layer_idx,
825	lora_module=lora_module,
826	available_matrices=all_lora_weights[layer_idx],

Callers 1

load_from_ckptMethod · 0.95

Calls 4

_generate_uidMethod · 0.95

release_gcFunction · 0.85

appendMethod · 0.45

infoMethod · 0.45

Tested by

no test coverage detected