(
self,
hidden_size,
ffn_hidden_size,
hidden_act,
bias=True,
dtype=None,
tp_group=None,
tp_size=1,
quant_mode=QuantMode(0),
)
| 1201 | class SmoothQuantMLP(Module): |
| 1202 | |
| 1203 | def __init__( |
| 1204 | self, |
| 1205 | hidden_size, |
| 1206 | ffn_hidden_size, |
| 1207 | hidden_act, |
| 1208 | bias=True, |
| 1209 | dtype=None, |
| 1210 | tp_group=None, |
| 1211 | tp_size=1, |
| 1212 | quant_mode=QuantMode(0), |
| 1213 | ): |
| 1214 | super().__init__() |
| 1215 | if hidden_act not in ACT2FN: |
| 1216 | raise ValueError( |
| 1217 | 'unsupported activation function: {}'.format(hidden_act)) |
| 1218 | fc_output_size = 2 * ffn_hidden_size if hidden_act == 'swiglu' else ffn_hidden_size |
| 1219 | self.fc = SmoothQuantColumnLinear(hidden_size, |
| 1220 | fc_output_size, |
| 1221 | bias=bias, |
| 1222 | dtype=dtype, |
| 1223 | tp_group=tp_group, |
| 1224 | tp_size=tp_size, |
| 1225 | gather_output=False, |
| 1226 | quant_mode=quant_mode) |
| 1227 | |
| 1228 | self.proj = SmoothQuantRowLinear(ffn_hidden_size, |
| 1229 | hidden_size, |
| 1230 | bias=bias, |
| 1231 | dtype=dtype, |
| 1232 | tp_group=tp_group, |
| 1233 | tp_size=tp_size, |
| 1234 | quant_mode=quant_mode) |
| 1235 | |
| 1236 | self.hidden_act = hidden_act |
| 1237 | self.quant_mode = quant_mode |
| 1238 | self.dtype = dtype |
| 1239 | |
| 1240 | if self.quant_mode.has_act_static_scaling(): |
| 1241 | self.quantization_scaling_factor = Parameter(shape=(1, ), |
| 1242 | dtype='float32') |
| 1243 | else: |
| 1244 | self.register_parameter('quantization_scaling_factor', None) |
| 1245 | |
| 1246 | def forward(self, hidden_states, lora_layer_params=None): |
| 1247 |
nothing calls this directly
no test coverage detected