(self, hidden_states, lora_layer_params=None)
| 1743 | quant_mode=quant_mode) |
| 1744 | |
| 1745 | def forward(self, hidden_states, lora_layer_params=None): |
| 1746 | assert lora_layer_params is None, f"lora is not supported on {self.__class__.__name__} now" |
| 1747 | inter = self.fc(hidden_states) |
| 1748 | inter = ACT2FN[self.hidden_act](inter) |
| 1749 | gate = self.gate(hidden_states) |
| 1750 | inter_x_gate = inter * gate |
| 1751 | if self.quant_mode.has_fp8_rowwise(): |
| 1752 | # Quantize per token outputs tuple: |
| 1753 | # quantized tensor and scaling factors per token |
| 1754 | clamp_val = None if self.clamp_val is None else self.clamp_val.value |
| 1755 | inter_x_gate = quantize_fp8_per_token(inter_x_gate, clamp_val) |
| 1756 | output = self.proj(inter_x_gate) |
| 1757 | return output |
| 1758 | |
| 1759 | |
| 1760 | class Fp8RowwiseFusedGatedMLP(Module): |
nothing calls this directly
no test coverage detected