| 767 | |
| 768 | |
| 769 | def dequantize(input: Tensor, |
| 770 | scale_factor: Tensor, |
| 771 | axis: int = -1, |
| 772 | output_type: Union[str, trt.DataType] = 'float16') -> Tensor: |
| 773 | |
| 774 | if isinstance(output_type, str): |
| 775 | output_type = str_dtype_to_trt(output_type) |
| 776 | |
| 777 | layer = default_trtnet().add_dequantize(input.trt_tensor, |
| 778 | scale_factor.trt_tensor, |
| 779 | output_type) |
| 780 | layer.axis = axis |
| 781 | |
| 782 | if not default_net().strongly_typed: |
| 783 | layer.precision = input.dtype |
| 784 | |
| 785 | output = _create_tensor(layer.get_output(0), layer) |
| 786 | |
| 787 | return output |
| 788 | |
| 789 | |
| 790 | def quantize_per_token( |