MCPcopy Index your code
hub / github.com/NVIDIA/TensorRT-LLM / __init__

Method __init__

tensorrt_llm/layers/moe.py:740–869  ·  view source on GitHub ↗
(self,
                 moe_config: MoeConfig,
                 hidden_size: int,
                 ffn_hidden_size: int,
                 hidden_act: str,
                 mapping: Mapping = Mapping(),
                 bias: bool = True,
                 dtype=None,
                 tp_group: List[int] = None,
                 tp_size: int = 1,
                 quant_mode=QuantMode(0),
                 use_all_reduce=True,
                 pre_quant_scale=False,
                 zero=False,
                 use_w4a8_awq=False,
                 use_int8_weight=False,
                 group_size: int = -1,
                 static_routing=False)

Source from the content-addressed store, hash-verified

738class MixtureOfExperts(Module):
739
740 def __init__(self,
741 moe_config: MoeConfig,
742 hidden_size: int,
743 ffn_hidden_size: int,
744 hidden_act: str,
745 mapping: Mapping = Mapping(),
746 bias: bool = True,
747 dtype=None,
748 tp_group: List[int] = None,
749 tp_size: int = 1,
750 quant_mode=QuantMode(0),
751 use_all_reduce=True,
752 pre_quant_scale=False,
753 zero=False,
754 use_w4a8_awq=False,
755 use_int8_weight=False,
756 group_size: int = -1,
757 static_routing=False):
758 super().__init__()
759
760 self.moe_config = moe_config
761 self.num_experts = moe_config.num_experts
762 self.top_k = moe_config.top_k
763
764 self.hidden_act = hidden_act
765 self.hidden_size = hidden_size
766 self.ffn_hidden_size = ffn_hidden_size
767 self.expert_inter_size = ffn_hidden_size
768 self.dtype = dtype
769 self.weight_dtype = dtype
770 self.tp_group = tp_group
771 self.tp_size = tp_size
772 self.mapping = mapping
773 self.quant_mode = quant_mode
774 self.bias = bias
775 self.use_all_reduce = use_all_reduce
776 self.zero = zero
777 self.pre_quant_scale = pre_quant_scale
778 self.use_w4a8_awq = use_w4a8_awq
779 self.use_int8_weight = use_int8_weight
780 self.group_size = group_size
781
782 if self.use_int8_weight and self.group_size > 0:
783 raise NotImplementedError("INT8-GPTQ is not implemented for MoE.")
784
785 self.static_routing = static_routing
786
787 self.experts_per_node = self.num_experts
788 if self.mapping.has_moe_ep():
789 if self.num_experts % self.mapping.moe_ep_size != 0:
790 raise ValueError(
791 f"MixtureOfExperts - Number of experts {self.num_experts} is not a multiple of EP size {self.mapping.moe_ep_size}"
792 )
793 self.experts_per_node = self.experts_per_node // self.mapping.moe_ep_size
794
795 if self.mapping.has_moe_tp():
796 if self.ffn_hidden_size % self.mapping.moe_tp_size != 0:
797 raise ValueError(

Callers

nothing calls this directly

Calls 13

init_expertsMethod · 0.95
MappingClass · 0.85
QuantModeClass · 0.85
RowLinearClass · 0.85
has_moe_epMethod · 0.80
has_moe_tpMethod · 0.80
is_weight_onlyMethod · 0.80
ep_expertsMethod · 0.80
__init__Method · 0.45
has_fp8_rowwiseMethod · 0.45
has_fp8_qdqMethod · 0.45
updateMethod · 0.45

Tested by

no test coverage detected