MCPcopy
hub / github.com/OpenPPL/ppq / insert_quantize_node

Method insert_quantize_node

ppq/parser/onnxruntime_exporter.py:81–145  ·  view source on GitHub ↗

Insert a Quantize Node on given variable, according to given TensorQuantizationConfig. There is two basic type of Quantize Node: QuantizeLinear and QuantizeFloating.

(
        self, graph: BaseGraph, 
        var: Variable, config: TensorQuantizationConfig, 
        op: Operation)

Source from the content-addressed store, hash-verified

79 return offset_dtype, value_dtype
80
81 def insert_quantize_node(
82 self, graph: BaseGraph,
83 var: Variable, config: TensorQuantizationConfig,
84 op: Operation) -> Operation:
85 """
86 Insert a Quantize Node on given variable, according to given TensorQuantizationConfig.
87 There is two basic type of Quantize Node: QuantizeLinear and QuantizeFloating.
88 """
89 if config.policy.has_property(QuantizationProperty.LINEAR):
90 # Following code will export Linear Quantization Config
91 # That is for FP32 -> INT
92 offset_dtype, value_type = self.infer_qtype(config)
93 scale = convert_any_to_torch_tensor(config.scale.clone(), dtype=torch.float32)
94 offset = ppq_tensor_round(config.offset.clone()).type(offset_dtype)
95
96 created = graph.create_operation(op_type='QuantizeLinear', attributes={})
97 if config.policy.has_property(QuantizationProperty.PER_CHANNEL):
98 created.attributes['axis'] = config.channel_axis
99 else: created.attributes['axis'] = None
100
101 if var in op.inputs: graph.insert_op_before(A=created, B=op, input_idx=op.inputs.index(var))
102 elif var in op.outputs: graph.insert_op_after(A=created, B=op, output_idx=op.outputs.index(var))
103 else: raise ValueError(f'Unexpected Error in Exporting Op {op.name}({op.type}).')
104
105 graph.create_variable(name=None, value=scale, is_parameter=True, dest_ops=[created])
106 graph.create_variable(name=None, value=offset, is_parameter=True, dest_ops=[created])
107
108 created.outputs[0].dtype = value_type
109 created.outputs[0].shape = var.shape
110 created.inputs[0].shape = var.shape
111 return created
112
113 elif config.policy.has_property(QuantizationProperty.FLOATING):
114 # Following code will export Linear Quantization Config
115 # That is for FP32 -> FP8
116 scale = convert_any_to_torch_tensor(config.scale.clone(), dtype=torch.float32)
117 offset = convert_any_to_torch_tensor(config.offset.clone(), dtype=torch.float32)
118
119 created = graph.create_operation(
120 op_type='QuantizeFloating',
121 attributes={
122 'min': config.quant_min,
123 'max': config.quant_max,
124 'exponent': config.exponent_bits,
125 'mantissa': config.mantissa_bits})
126
127 if config.policy.has_property(QuantizationProperty.PER_CHANNEL):
128 created.attributes['axis'] = config.channel_axis
129 else: created.attributes['axis'] = None
130
131 if var in op.inputs: graph.insert_op_before(A=created, B=op, input_idx=op.inputs.index(var))
132 elif var in op.outputs: graph.insert_op_after(A=created, B=op, output_idx=op.outputs.index(var))
133 else: raise ValueError(f'Unexpected Error in Exporting Op {op.name}({op.type}).')
134
135 graph.create_variable(name=None, value=scale, is_parameter=True, dest_ops=[created])
136 graph.create_variable(name=None, value=offset, is_parameter=True, dest_ops=[created])
137
138 created.outputs[0].shape = var.shape

Callers 2

remove_activation_opsMethod · 0.95
convert_operationMethod · 0.95

Calls 9

infer_qtypeMethod · 0.95
ppq_tensor_roundFunction · 0.90
has_propertyMethod · 0.80
typeMethod · 0.80
create_operationMethod · 0.80
insert_op_beforeMethod · 0.80
insert_op_afterMethod · 0.80
create_variableMethod · 0.80

Tested by

no test coverage detected