MCPcopy
hub / github.com/NVIDIA/TensorRT-LLM / __init__

Method __init__

tensorrt_llm/quantization/layers.py:637–668  ·  view source on GitHub ↗
(
            self,
            in_features,
            out_features,
            bias=True,
            dtype=None,
            tp_group=None,
            tp_size=1,
            quant_mode=QuantMode(0),
    )

Source from the content-addressed store, hash-verified

635class Fp8RowwiseRowLinear(RowLinear):
636
637 def __init__(
638 self,
639 in_features,
640 out_features,
641 bias=True,
642 dtype=None,
643 tp_group=None,
644 tp_size=1,
645 quant_mode=QuantMode(0),
646 ):
647 super().__init__(in_features,
648 out_features,
649 bias=bias,
650 dtype=dtype,
651 tp_group=tp_group,
652 tp_size=tp_size)
653 if not quant_mode.has_fp8_rowwise():
654 raise ValueError(
655 "Fp8 Rowwise Linear has to have act+weight quantization mode set"
656 )
657 weights_dtype = dtype
658 if quant_mode.has_fp8_rowwise():
659 weights_dtype = "fp8"
660
661 self.weight = Parameter(shape=(self.out_features, self.in_features),
662 dtype=weights_dtype)
663 if quant_mode.has_fp8_rowwise():
664 self.per_channel_scale = Parameter(shape=(self.out_features, ),
665 dtype="float32")
666
667 self.quant_mode = quant_mode
668 self.tllm_to_externel_key_dict = {"weight": ["weight", "weight_scale"]}
669
670 def forward(self, x, lora_runtime_params=None, all_reduce_params=None):
671 assert lora_runtime_params is None, "lora is not supported on SmoothQuantRowLinear now"

Callers

nothing calls this directly

Calls 4

QuantModeClass · 0.85
ParameterClass · 0.85
__init__Method · 0.45
has_fp8_rowwiseMethod · 0.45

Tested by

no test coverage detected