MCPcopy Index your code
hub / github.com/NVIDIA/TensorRT-LLM / __init__

Method __init__

tensorrt_llm/quantization/layers.py:803–848  ·  view source on GitHub ↗
(
        self,
        in_features,
        out_features,
        bias=True,
        dtype=None,
        tp_group=None,
        tp_size=1,
        tp_rank=0,
        quant_mode=QuantMode.use_weight_only(),
        prefer_managed_weight=True,
        is_expert=False,
    )

Source from the content-addressed store, hash-verified

801class WeightOnlyQuantRowLinear(RowLinear):
802
803 def __init__(
804 self,
805 in_features,
806 out_features,
807 bias=True,
808 dtype=None,
809 tp_group=None,
810 tp_size=1,
811 tp_rank=0,
812 quant_mode=QuantMode.use_weight_only(),
813 prefer_managed_weight=True,
814 is_expert=False,
815 ):
816 multiple = 64 * tp_size
817 self.is_padded = False
818 if in_features % multiple > 0:
819 in_features = math.ceil(in_features / multiple) * multiple
820 self.is_padded = True
821 if out_features % multiple > 0:
822 out_features = math.ceil(out_features / multiple) * multiple
823 self.is_padded = True
824
825 super().__init__(in_features,
826 out_features,
827 bias=bias,
828 dtype=dtype,
829 tp_group=tp_group,
830 tp_size=tp_size,
831 prefer_managed_weight=prefer_managed_weight,
832 is_expert=is_expert)
833 if quant_mode.is_int8_weight_only():
834 self.weight_only_quant_mode = 1
835 elif quant_mode.is_int4_weight_only():
836 self.weight_only_quant_mode = 2
837 #we use a fake tensor with data_type = int8
838 self.weight = Parameter(shape=(self.in_features,
839 int(self.out_features /
840 self.weight_only_quant_mode)),
841 dtype="int8",
842 prefer_managed=prefer_managed_weight)
843 self.per_channel_scale = Parameter(shape=(self.out_features, ),
844 dtype=dtype)
845 self.tp_rank = tp_rank
846 if self.is_padded:
847 self.tp_dim = -1
848 self.quant_mode = quant_mode
849
850 def forward(self, x, lora_runtime_params=None, all_reduce_params=None):
851 hidden_state = x

Callers

nothing calls this directly

Calls 5

ParameterClass · 0.85
use_weight_onlyMethod · 0.80
is_int8_weight_onlyMethod · 0.80
is_int4_weight_onlyMethod · 0.80
__init__Method · 0.45

Tested by

no test coverage detected