(q_bits, is_symmetric_quant, activations_ref, num_groups)
| 72 | |
| 73 | |
| 74 | def run_float_quantize(q_bits, is_symmetric_quant, activations_ref, num_groups): |
| 75 | |
| 76 | # Reference implementation |
| 77 | # https://pytorch.org/docs/stable/quantization-support.html |
| 78 | |
| 79 | activations_ref = activations_ref.reshape(num_groups, -1).to(dtype=torch.float32) |
| 80 | |
| 81 | max_abs_activations_ref = torch.amax(torch.abs(activations_ref), dim=-1).view(num_groups, -1) |
| 82 | max_activations_ref = torch.amax(activations_ref, dim=-1).view(num_groups, -1) |
| 83 | min_activations_ref = torch.amin(activations_ref, dim=-1).view(num_groups, -1) |
| 84 | |
| 85 | _, q_max, q_min = get_q_props(q_bits) |
| 86 | |
| 87 | scale, zero_point = get_scale_zero_point(q_bits, is_symmetric_quant, max_activations_ref, min_activations_ref, |
| 88 | max_abs_activations_ref) |
| 89 | |
| 90 | data_f = activations_ref * scale |
| 91 | |
| 92 | if not is_symmetric_quant: |
| 93 | data_f = data_f + zero_point |
| 94 | |
| 95 | data_i32 = torch.round(data_f).to(dtype=torch.int32) |
| 96 | |
| 97 | data_i32 = torch.minimum(torch.maximum(data_i32, q_min.expand_as(data_i32)), q_max.expand_as(data_i32)) |
| 98 | data_i8 = data_i32.to(dtype=torch.int8) |
| 99 | |
| 100 | scales = (1.0 / scale).reshape(-1, 1) |
| 101 | offsets = zero_point.reshape(-1, 1) |
| 102 | params = torch.cat((scales, offsets), dim=-1) |
| 103 | |
| 104 | return data_i8, params |
| 105 | |
| 106 | |
| 107 | def run_float_dequantize(q_bits, is_symmetric_quant, data_i8, params, num_groups): |
no test coverage detected
searching dependent graphs…