hub / github.com/NVIDIA/TensorRT-LLM / __init__

Method init

tensorrt_llm/quantization/layers.py:803–848 · view source on GitHub ↗

(
        self,
        in_features,
        out_features,
        bias=True,
        dtype=None,
        tp_group=None,
        tp_size=1,
        tp_rank=0,
        quant_mode=QuantMode.use_weight_only(),
        prefer_managed_weight=True,
        is_expert=False,
    )

Source from the content-addressed store, hash-verified

801	class WeightOnlyQuantRowLinear(RowLinear):
802
803	def __init__(
804	self,
805	in_features,
806	out_features,
807	bias=True,
808	dtype=None,
809	tp_group=None,
810	tp_size=1,
811	tp_rank=0,
812	quant_mode=QuantMode.use_weight_only(),
813	prefer_managed_weight=True,
814	is_expert=False,
815	):
816	multiple = 64 * tp_size
817	self.is_padded = False
818	if in_features % multiple > 0:
819	in_features = math.ceil(in_features / multiple) * multiple
820	self.is_padded = True
821	if out_features % multiple > 0:
822	out_features = math.ceil(out_features / multiple) * multiple
823	self.is_padded = True
824
825	super().__init__(in_features,
826	out_features,
827	bias=bias,
828	dtype=dtype,
829	tp_group=tp_group,
830	tp_size=tp_size,
831	prefer_managed_weight=prefer_managed_weight,
832	is_expert=is_expert)
833	if quant_mode.is_int8_weight_only():
834	self.weight_only_quant_mode = 1
835	elif quant_mode.is_int4_weight_only():
836	self.weight_only_quant_mode = 2
837	#we use a fake tensor with data_type = int8
838	self.weight = Parameter(shape=(self.in_features,
839	int(self.out_features /
840	self.weight_only_quant_mode)),
841	dtype="int8",
842	prefer_managed=prefer_managed_weight)
843	self.per_channel_scale = Parameter(shape=(self.out_features, ),
844	dtype=dtype)
845	self.tp_rank = tp_rank
846	if self.is_padded:
847	self.tp_dim = -1
848	self.quant_mode = quant_mode
849
850	def forward(self, x, lora_runtime_params=None, all_reduce_params=None):
851	hidden_state = x

Callers

nothing calls this directly

Calls 5

ParameterClass · 0.85

use_weight_onlyMethod · 0.80

is_int8_weight_onlyMethod · 0.80

is_int4_weight_onlyMethod · 0.80

__init__Method · 0.45

Tested by

no test coverage detected

Method __init__

Source from the content-addressed store, hash-verified

Callers

Calls 5

Tested by

Method init