(self, tensor: torch.Tensor, config: TensorQuantizationConfig)
| 386 | pass # do nothing here. |
| 387 | |
| 388 | def __call__(self, tensor: torch.Tensor, config: TensorQuantizationConfig) -> torch.Tensor: |
| 389 | if tensor.is_cuda and PPQ_CONFIG.USING_CUDA_KERNEL: |
| 390 | if config.policy.has_property(QuantizationProperty.LINEAR): |
| 391 | if config.policy.has_property(QuantizationProperty.PER_CHANNEL): |
| 392 | return CuLSQ_LC.apply( |
| 393 | tensor, config.scale, config.offset, config.channel_axis, |
| 394 | config.quant_min, config.quant_max, config.rounding) |
| 395 | elif config.policy.has_property(QuantizationProperty.PER_TENSOR): |
| 396 | return CuLSQ_LT.apply( |
| 397 | tensor, config.scale, config.offset, |
| 398 | config.quant_min, config.quant_max, config.rounding) |
| 399 | |
| 400 | elif config.policy.has_property(QuantizationProperty.FLOATING): |
| 401 | # For floating quantization, scale is not trainable. |
| 402 | return PPQuantFunction(tensor=tensor, config=config) |
| 403 | |
| 404 | else: |
| 405 | scale, offset = config.scale, config.offset |
| 406 | |
| 407 | if self.is_scale_trainable: |
| 408 | scale = scale.abs() |
| 409 | grad_scale = 1 / (tensor.numel() * config.quant_max) ** 0.5 |
| 410 | scale = scale * grad_scale + (scale - scale * grad_scale).detach() |
| 411 | |
| 412 | if config.policy.has_property(QuantizationProperty.PER_CHANNEL): |
| 413 | shape = [1 if axis != config.channel_axis else -1 for axis in range(tensor.ndim)] |
| 414 | scale = scale.view(shape) |
| 415 | offset = offset.view(shape) |
| 416 | |
| 417 | quantized = ppq_tensor_round((tensor / scale), config.rounding) + offset.detach() |
| 418 | quantized = torch.clamp(quantized, config.quant_min, config.quant_max) |
| 419 | quantized = (quantized - offset.detach()) * scale |
| 420 | quantized = quantized |
| 421 | return quantized |
| 422 |
nothing calls this directly
no test coverage detected