(
self,
in_features,
out_features,
bias=True,
dtype=None,
tp_group=None,
tp_size=1,
quant_mode=QuantMode(0),
prefer_managed_weight=True,
)
| 172 | class SmoothQuantRowLinear(RowLinear): |
| 173 | |
| 174 | def __init__( |
| 175 | self, |
| 176 | in_features, |
| 177 | out_features, |
| 178 | bias=True, |
| 179 | dtype=None, |
| 180 | tp_group=None, |
| 181 | tp_size=1, |
| 182 | quant_mode=QuantMode(0), |
| 183 | prefer_managed_weight=True, |
| 184 | ): |
| 185 | super().__init__(in_features, |
| 186 | out_features, |
| 187 | bias=bias, |
| 188 | dtype=dtype, |
| 189 | tp_group=tp_group, |
| 190 | tp_size=tp_size, |
| 191 | prefer_managed_weight=prefer_managed_weight) |
| 192 | if not quant_mode.has_act_and_weight_quant(): |
| 193 | raise ValueError( |
| 194 | "SmoothQuant Linear has to have act+weight quantization mode set" |
| 195 | ) |
| 196 | weights_dtype = dtype |
| 197 | if quant_mode.has_act_and_weight_quant(): |
| 198 | weights_dtype = "int8" |
| 199 | |
| 200 | self.weight = Parameter(shape=(self.out_features, self.in_features), |
| 201 | dtype=weights_dtype, |
| 202 | prefer_managed=self.prefer_managed_weight) |
| 203 | self.smoother = Parameter(shape=(1, self.in_features), dtype="float32") |
| 204 | if quant_mode.has_act_and_weight_quant(): |
| 205 | scale_shape = (1, self.out_features |
| 206 | ) if quant_mode.has_per_channel_scaling() else (1, 1) |
| 207 | self.per_channel_scale = Parameter(shape=scale_shape, |
| 208 | dtype="float32") |
| 209 | |
| 210 | if quant_mode.has_act_static_scaling(): |
| 211 | self.act_scale = Parameter(shape=(1, 1), dtype="float32") |
| 212 | |
| 213 | self.quant_mode = quant_mode |
| 214 | |
| 215 | def forward(self, x, lora_runtime_params=None, all_reduce_params=None): |
| 216 | assert lora_runtime_params is None, "lora is not supported on SmoothQuantRowLinear now" |
nothing calls this directly
no test coverage detected