MCPcopy
hub / github.com/OpenPPL/ppq / init_quantize_config

Method init_quantize_config

ppq/samples/Tutorial/dispatch.py:24–68  ·  view source on GitHub ↗
(self, operation: Operation)

Source from the content-addressed store, hash-verified

22 # 我建议你针对这类情况进行回应。或者,在探测到算子类型并非可量化类型后进行报错
23 # ------------------------------------------------------------
24 def init_quantize_config(self, operation: Operation) -> OperationQuantizationConfig:
25 # ------------------------------------------------------------
26 # 为卷积算子初始化量化信息,只量化卷积算子的输入(input & weight),bias 不做量化
27 # ------------------------------------------------------------
28 if operation.type == 'Conv':
29 config = self.create_default_quant_config(
30 op = operation,
31 num_of_bits = 4,
32 quant_max = 15,
33 quant_min = -16,
34 observer_algorithm = 'percentile',
35 policy = QuantizationPolicy(
36 QuantizationProperty.PER_TENSOR +
37 QuantizationProperty.LINEAR +
38 QuantizationProperty.SYMMETRICAL),
39 rounding = RoundingPolicy.ROUND_HALF_EVEN)
40
41 # ------------------------------------------------------------
42 # 关闭所有输出量化,状态设置为fp32
43 # ------------------------------------------------------------
44 for tensor_quant_config in config.output_quantization_config:
45 tensor_quant_config.state = QuantizationStates.FP32
46
47 # ------------------------------------------------------------
48 # 关闭 bias 量化,状态设置为fp32
49 # ------------------------------------------------------------
50 if operation.num_of_input == 3:
51 config.input_quantization_config[-1].state = QuantizationStates.FP32
52
53 # ------------------------------------------------------------
54 # 如果算子被调度到 INT8 平台上,执行 INT8 的量化
55 # ------------------------------------------------------------
56 if operation.platform == TargetPlatform.ACADEMIC_INT8:
57 print(f'{operation.name} has been dispatched to INT8')
58 config.input_quantization_config[0].num_of_bits = 8
59 config.input_quantization_config[0].quant_max = 127
60 config.input_quantization_config[0].quant_min = -128
61
62 config.input_quantization_config[1].num_of_bits = 8
63 config.input_quantization_config[1].quant_max = 127
64 config.input_quantization_config[1].quant_min = -128
65
66 return config
67 else:
68 raise TypeError(f'Unsupported Op Type: {operation.type}')
69
70 # ------------------------------------------------------------
71 # 当前量化器进行量化的算子都将被发往一个指定的目标平台

Callers

nothing calls this directly

Calls 2

QuantizationPolicyClass · 0.85

Tested by

no test coverage detected