(ctx, tensor: torch.Tensor, scales: torch.Tensor,
offsets: torch.Tensor, quant_min: int, quant_max: int,
rounding: RoundingPolicy)
| 20 | """ |
| 21 | @ staticmethod |
| 22 | def forward(ctx, tensor: torch.Tensor, scales: torch.Tensor, |
| 23 | offsets: torch.Tensor, quant_min: int, quant_max: int, |
| 24 | rounding: RoundingPolicy) -> torch.Tensor: |
| 25 | scales, offsets = scales.to(tensor.device), offsets.to(tensor.device) |
| 26 | |
| 27 | if not PPQ_CONFIG.USING_CUDA_KERNEL or not tensor.is_cuda: |
| 28 | # quantization function, pytorch implmentation |
| 29 | tensor = ppq_tensor_round((tensor / scales), rounding) + offsets |
| 30 | tensor = torch.clamp(tensor, quant_min, quant_max) |
| 31 | tensor = (tensor - offsets) * scales |
| 32 | return tensor |
| 33 | |
| 34 | else: |
| 35 | from ppq.core import CUDA |
| 36 | |
| 37 | # quantization function, pure cuda implmentation |
| 38 | quantized = CUDA.LinearQuantize_T( |
| 39 | tensor=tensor, |
| 40 | scales=scales, |
| 41 | offsets=offsets, |
| 42 | minimum=quant_min, |
| 43 | maximum=quant_max, |
| 44 | rounding=rounding.value |
| 45 | ) |
| 46 | return quantized |
| 47 | |
| 48 | @ staticmethod |
| 49 | def backward(ctx, dy: torch.Tensor): |
nothing calls this directly
no test coverage detected