MCPcopy Index your code
hub / github.com/NVIDIA/TensorRT-LLM / __init__

Method __init__

tensorrt_llm/quantization/layers.py:1203–1244  ·  view source on GitHub ↗
(
            self,
            hidden_size,
            ffn_hidden_size,
            hidden_act,
            bias=True,
            dtype=None,
            tp_group=None,
            tp_size=1,
            quant_mode=QuantMode(0),
    )

Source from the content-addressed store, hash-verified

1201class SmoothQuantMLP(Module):
1202
1203 def __init__(
1204 self,
1205 hidden_size,
1206 ffn_hidden_size,
1207 hidden_act,
1208 bias=True,
1209 dtype=None,
1210 tp_group=None,
1211 tp_size=1,
1212 quant_mode=QuantMode(0),
1213 ):
1214 super().__init__()
1215 if hidden_act not in ACT2FN:
1216 raise ValueError(
1217 'unsupported activation function: {}'.format(hidden_act))
1218 fc_output_size = 2 * ffn_hidden_size if hidden_act == 'swiglu' else ffn_hidden_size
1219 self.fc = SmoothQuantColumnLinear(hidden_size,
1220 fc_output_size,
1221 bias=bias,
1222 dtype=dtype,
1223 tp_group=tp_group,
1224 tp_size=tp_size,
1225 gather_output=False,
1226 quant_mode=quant_mode)
1227
1228 self.proj = SmoothQuantRowLinear(ffn_hidden_size,
1229 hidden_size,
1230 bias=bias,
1231 dtype=dtype,
1232 tp_group=tp_group,
1233 tp_size=tp_size,
1234 quant_mode=quant_mode)
1235
1236 self.hidden_act = hidden_act
1237 self.quant_mode = quant_mode
1238 self.dtype = dtype
1239
1240 if self.quant_mode.has_act_static_scaling():
1241 self.quantization_scaling_factor = Parameter(shape=(1, ),
1242 dtype='float32')
1243 else:
1244 self.register_parameter('quantization_scaling_factor', None)
1245
1246 def forward(self, hidden_states, lora_layer_params=None):
1247

Callers

nothing calls this directly

Calls 6

QuantModeClass · 0.85
ParameterClass · 0.85
__init__Method · 0.45
register_parameterMethod · 0.45

Tested by

no test coverage detected