(
self,
in_features,
out_features,
bias=True,
dtype=None,
tp_group=None,
tp_size=1,
quant_mode=QuantMode(0),
)
| 635 | class Fp8RowwiseRowLinear(RowLinear): |
| 636 | |
| 637 | def __init__( |
| 638 | self, |
| 639 | in_features, |
| 640 | out_features, |
| 641 | bias=True, |
| 642 | dtype=None, |
| 643 | tp_group=None, |
| 644 | tp_size=1, |
| 645 | quant_mode=QuantMode(0), |
| 646 | ): |
| 647 | super().__init__(in_features, |
| 648 | out_features, |
| 649 | bias=bias, |
| 650 | dtype=dtype, |
| 651 | tp_group=tp_group, |
| 652 | tp_size=tp_size) |
| 653 | if not quant_mode.has_fp8_rowwise(): |
| 654 | raise ValueError( |
| 655 | "Fp8 Rowwise Linear has to have act+weight quantization mode set" |
| 656 | ) |
| 657 | weights_dtype = dtype |
| 658 | if quant_mode.has_fp8_rowwise(): |
| 659 | weights_dtype = "fp8" |
| 660 | |
| 661 | self.weight = Parameter(shape=(self.out_features, self.in_features), |
| 662 | dtype=weights_dtype) |
| 663 | if quant_mode.has_fp8_rowwise(): |
| 664 | self.per_channel_scale = Parameter(shape=(self.out_features, ), |
| 665 | dtype="float32") |
| 666 | |
| 667 | self.quant_mode = quant_mode |
| 668 | self.tllm_to_externel_key_dict = {"weight": ["weight", "weight_scale"]} |
| 669 | |
| 670 | def forward(self, x, lora_runtime_params=None, all_reduce_params=None): |
| 671 | assert lora_runtime_params is None, "lora is not supported on SmoothQuantRowLinear now" |
nothing calls this directly
no test coverage detected