(self, hidden_states)
| 2912 | self.dtype = dtype |
| 2913 | |
| 2914 | def forward(self, hidden_states): |
| 2915 | inter = self.fc(hidden_states) |
| 2916 | inter = ACT2FN[self.hidden_act](inter) |
| 2917 | inter = quantize_per_token( |
| 2918 | inter, |
| 2919 | scale_dtype='float16', |
| 2920 | sum_per_token=not self.quant_mode.has_per_group_scaling(), |
| 2921 | sum_dtype='float16') |
| 2922 | output = self.proj(inter) |
| 2923 | return output |
| 2924 | |
| 2925 | |
| 2926 | # TODO: Mostly duplicates SmoothQuantGatedMLP. |
nothing calls this directly
no test coverage detected