MCPcopy
hub / github.com/NVIDIA/TensorRT-LLM / __init__

Method __init__

tensorrt_llm/quantization/layers.py:174–213  ·  view source on GitHub ↗
(
            self,
            in_features,
            out_features,
            bias=True,
            dtype=None,
            tp_group=None,
            tp_size=1,
            quant_mode=QuantMode(0),
            prefer_managed_weight=True,
    )

Source from the content-addressed store, hash-verified

172class SmoothQuantRowLinear(RowLinear):
173
174 def __init__(
175 self,
176 in_features,
177 out_features,
178 bias=True,
179 dtype=None,
180 tp_group=None,
181 tp_size=1,
182 quant_mode=QuantMode(0),
183 prefer_managed_weight=True,
184 ):
185 super().__init__(in_features,
186 out_features,
187 bias=bias,
188 dtype=dtype,
189 tp_group=tp_group,
190 tp_size=tp_size,
191 prefer_managed_weight=prefer_managed_weight)
192 if not quant_mode.has_act_and_weight_quant():
193 raise ValueError(
194 "SmoothQuant Linear has to have act+weight quantization mode set"
195 )
196 weights_dtype = dtype
197 if quant_mode.has_act_and_weight_quant():
198 weights_dtype = "int8"
199
200 self.weight = Parameter(shape=(self.out_features, self.in_features),
201 dtype=weights_dtype,
202 prefer_managed=self.prefer_managed_weight)
203 self.smoother = Parameter(shape=(1, self.in_features), dtype="float32")
204 if quant_mode.has_act_and_weight_quant():
205 scale_shape = (1, self.out_features
206 ) if quant_mode.has_per_channel_scaling() else (1, 1)
207 self.per_channel_scale = Parameter(shape=scale_shape,
208 dtype="float32")
209
210 if quant_mode.has_act_static_scaling():
211 self.act_scale = Parameter(shape=(1, 1), dtype="float32")
212
213 self.quant_mode = quant_mode
214
215 def forward(self, x, lora_runtime_params=None, all_reduce_params=None):
216 assert lora_runtime_params is None, "lora is not supported on SmoothQuantRowLinear now"

Callers

nothing calls this directly

Calls 6

QuantModeClass · 0.85
ParameterClass · 0.85
__init__Method · 0.45

Tested by

no test coverage detected