(self, hidden_states, lora_layer_params=None)
| 1808 | self.register_parameter('clamp_val', None) |
| 1809 | |
| 1810 | def forward(self, hidden_states, lora_layer_params=None): |
| 1811 | assert lora_layer_params is None, f"lora is not supported on {self.__class__.__name__} now" |
| 1812 | inter = self.fused_fc(hidden_states) |
| 1813 | |
| 1814 | if self.hidden_act == 'silu': |
| 1815 | inter = ACT2FN['swiglu'](inter) |
| 1816 | elif self.hidden_act == 'gelu': |
| 1817 | inter = ACT2FN['geglu'](inter) |
| 1818 | else: |
| 1819 | raise NotImplementedError( |
| 1820 | f"Activation {self.hidden_act} not yet implemented for {self.__class__.__name__}." |
| 1821 | ) |
| 1822 | |
| 1823 | if self.quant_mode.has_fp8_rowwise(): |
| 1824 | # Quantize per token outputs tuple: |
| 1825 | # quantized tensor and scaling factors per token |
| 1826 | clamp_val = None if self.clamp_val is None else self.clamp_val.value |
| 1827 | inter = quantize_fp8_per_token(inter, clamp_val) |
| 1828 | output = self.proj(inter) |
| 1829 | return output |
| 1830 | |
| 1831 | |
| 1832 | class Fp8RowwiseAttention(Module): |
nothing calls this directly
no test coverage detected