(self, operation: Operation)
| 22 | # 我建议你针对这类情况进行回应。或者,在探测到算子类型并非可量化类型后进行报错 |
| 23 | # ------------------------------------------------------------ |
| 24 | def init_quantize_config(self, operation: Operation) -> OperationQuantizationConfig: |
| 25 | # ------------------------------------------------------------ |
| 26 | # 为卷积算子初始化量化信息,只量化卷积算子的输入(input & weight),bias 不做量化 |
| 27 | # ------------------------------------------------------------ |
| 28 | if operation.type == 'Conv': |
| 29 | config = self.create_default_quant_config( |
| 30 | op = operation, |
| 31 | num_of_bits = 4, |
| 32 | quant_max = 15, |
| 33 | quant_min = -16, |
| 34 | observer_algorithm = 'percentile', |
| 35 | policy = QuantizationPolicy( |
| 36 | QuantizationProperty.PER_TENSOR + |
| 37 | QuantizationProperty.LINEAR + |
| 38 | QuantizationProperty.SYMMETRICAL), |
| 39 | rounding = RoundingPolicy.ROUND_HALF_EVEN) |
| 40 | |
| 41 | # ------------------------------------------------------------ |
| 42 | # 关闭所有输出量化,状态设置为fp32 |
| 43 | # ------------------------------------------------------------ |
| 44 | for tensor_quant_config in config.output_quantization_config: |
| 45 | tensor_quant_config.state = QuantizationStates.FP32 |
| 46 | |
| 47 | # ------------------------------------------------------------ |
| 48 | # 关闭 bias 量化,状态设置为fp32 |
| 49 | # ------------------------------------------------------------ |
| 50 | if operation.num_of_input == 3: |
| 51 | config.input_quantization_config[-1].state = QuantizationStates.FP32 |
| 52 | |
| 53 | # ------------------------------------------------------------ |
| 54 | # 如果算子被调度到 INT8 平台上,执行 INT8 的量化 |
| 55 | # ------------------------------------------------------------ |
| 56 | if operation.platform == TargetPlatform.ACADEMIC_INT8: |
| 57 | print(f'{operation.name} has been dispatched to INT8') |
| 58 | config.input_quantization_config[0].num_of_bits = 8 |
| 59 | config.input_quantization_config[0].quant_max = 127 |
| 60 | config.input_quantization_config[0].quant_min = -128 |
| 61 | |
| 62 | config.input_quantization_config[1].num_of_bits = 8 |
| 63 | config.input_quantization_config[1].quant_max = 127 |
| 64 | config.input_quantization_config[1].quant_min = -128 |
| 65 | |
| 66 | return config |
| 67 | else: |
| 68 | raise TypeError(f'Unsupported Op Type: {operation.type}') |
| 69 | |
| 70 | # ------------------------------------------------------------ |
| 71 | # 当前量化器进行量化的算子都将被发往一个指定的目标平台 |
nothing calls this directly
no test coverage detected