PPQ 核心量化函数,没啥好说的了吧,这个玩意只做 quant 不做 dequant
(tensor: torch.Tensor, config: TensorQuantizationConfig)
| 216 | config.quant_min, config.quant_max, config.rounding) |
| 217 | |
| 218 | def PPQLinearQuant_toInt(tensor: torch.Tensor, config: TensorQuantizationConfig) -> torch.Tensor: |
| 219 | """PPQ 核心量化函数,没啥好说的了吧,这个玩意只做 quant 不做 dequant""" |
| 220 | if not config.policy.has_property(QuantizationProperty.LINEAR): |
| 221 | raise ValueError('Critical Quantization Error! Non-linear config detected.') |
| 222 | if config.policy.has_property(QuantizationProperty.PER_CHANNEL): |
| 223 | shape = [1 if axis != config.channel_axis else -1 for axis in range(tensor.ndim)] |
| 224 | scale, offset = config.scale.view(shape), config.offset.view(shape) |
| 225 | tensor = ppq_tensor_round((tensor / scale), config.rounding) + offset |
| 226 | tensor = torch.clamp(tensor, config.quant_min, config.quant_max) |
| 227 | elif config.policy.has_property(QuantizationProperty.PER_TENSOR): |
| 228 | tensor = ppq_tensor_round((tensor / config.scale), config.rounding) + config.offset |
| 229 | tensor = torch.clamp(tensor, config.quant_min, config.quant_max) |
| 230 | |
| 231 | if config.num_of_bits == 8: |
| 232 | if config.policy.has_property(QuantizationProperty.SYMMETRICAL): |
| 233 | return tensor.type(dtype=torch.int8) |
| 234 | if config.policy.has_property(QuantizationProperty.ASYMMETRICAL): |
| 235 | return tensor.type(dtype=torch.uint8) |
| 236 | elif config.num_of_bits > 8: |
| 237 | return tensor.type(dtype=torch.int32) |
| 238 | else: raise Exception('Do not konw how to convert value into int. num of bits is unexpected.') |
no test coverage detected