Class LoraManager

tensorrt_llm/lora_manager.py:640–1269 · view source on GitHub ↗

Source from the content-addressed store, hash-verified

638
639
640	class LoraManager(object):
641	LORA_MODULE_IDS = {
642	"attn_qkv": 0,
643	"attn_q": 1,
644	"attn_k": 2,
645	"attn_v": 3,
646	"attn_dense": 4,
647	"mlp_h_to_4h": 5,
648	"mlp_4h_to_h": 6,
649	"mlp_gate": 7,
650	"cross_attn_qkv": 8,
651	"cross_attn_q": 9,
652	"cross_attn_k": 10,
653	"cross_attn_v": 11,
654	"cross_attn_dense": 12,
655	"moe_h_to_4h": 13,
656	"moe_4h_to_h": 14,
657	"moe_gate": 15,
658	"moe_router": 16,
659	"mlp_router": 17,
660	"mlp_gate_up": 18,
661	}
662
663	def __init__(
664	self,
665	*,
666	mapping: Mapping,
667	model_config: "ModelConfig",
668	cpp_peft_cache_manager: tb_internal.batch_manager.PeftCacheManager \| None = None,
669	):
670	"""Constructor.
671
672	Args:
673	mapping (Mapping): Parallelism related information.
674	model_config (ModelConfig): model configuration python class instance.
675	cpp_peft_cache_manager (PeftCacheManager, optional): used by is_adapter_in_cpu_cache method, that's used for
676	a performance optimization with LoRA of not sending the LoRA adapter weights with every LLM request when
677	the adapter is already loaded in the LoRA CPU cache.
678	"""
679	# _lora_uid_to_low_ranks: dict[str -> dict[int -> dict[str -> int]]]
680	# {
681	# uid: {
682	# 0: {
683	# lora_module: int
684	# }, # layer_0_rank,
685	# 1: {
686	# lora_module: int
687	# }, # layer_1_rank,
688	# ...
689	# }
690	# }
691
692	# _lora_weights_pointers_list: dict[str -> dict[int -> dict[str -> [Tensor, Tensor]]]]
693	# {
694	# uid: {
695	# 0: {
696	# lora_module: [t_in, t_out]
697	# }, # layer_0,

__init__Method · 0.90

setup_engineMethod · 0.85

__init__Method · 0.85

from_engineMethod · 0.85

from_dirMethod · 0.85

no outgoing calls

no test coverage detected