hub / github.com/OpenPPL/ppq / insert_quantize_node

Method insert_quantize_node

ppq/parser/onnxruntime_exporter.py:81–145 · view source on GitHub ↗

Insert a Quantize Node on given variable, according to given TensorQuantizationConfig. There is two basic type of Quantize Node: QuantizeLinear and QuantizeFloating.

(
        self, graph: BaseGraph, 
        var: Variable, config: TensorQuantizationConfig, 
        op: Operation)

Source from the content-addressed store, hash-verified

79	return offset_dtype, value_dtype
80
81	def insert_quantize_node(
82	self, graph: BaseGraph,
83	var: Variable, config: TensorQuantizationConfig,
84	op: Operation) -> Operation:
85	"""
86	Insert a Quantize Node on given variable, according to given TensorQuantizationConfig.
87	There is two basic type of Quantize Node: QuantizeLinear and QuantizeFloating.
88	"""
89	if config.policy.has_property(QuantizationProperty.LINEAR):
90	# Following code will export Linear Quantization Config
91	# That is for FP32 -> INT
92	offset_dtype, value_type = self.infer_qtype(config)
93	scale = convert_any_to_torch_tensor(config.scale.clone(), dtype=torch.float32)
94	offset = ppq_tensor_round(config.offset.clone()).type(offset_dtype)
95
96	created = graph.create_operation(op_type='QuantizeLinear', attributes={})
97	if config.policy.has_property(QuantizationProperty.PER_CHANNEL):
98	created.attributes['axis'] = config.channel_axis
99	else: created.attributes['axis'] = None
100
101	if var in op.inputs: graph.insert_op_before(A=created, B=op, input_idx=op.inputs.index(var))
102	elif var in op.outputs: graph.insert_op_after(A=created, B=op, output_idx=op.outputs.index(var))
103	else: raise ValueError(f'Unexpected Error in Exporting Op {op.name}({op.type}).')
104
105	graph.create_variable(name=None, value=scale, is_parameter=True, dest_ops=[created])
106	graph.create_variable(name=None, value=offset, is_parameter=True, dest_ops=[created])
107
108	created.outputs[0].dtype = value_type
109	created.outputs[0].shape = var.shape
110	created.inputs[0].shape = var.shape
111	return created
112
113	elif config.policy.has_property(QuantizationProperty.FLOATING):
114	# Following code will export Linear Quantization Config
115	# That is for FP32 -> FP8
116	scale = convert_any_to_torch_tensor(config.scale.clone(), dtype=torch.float32)
117	offset = convert_any_to_torch_tensor(config.offset.clone(), dtype=torch.float32)
118
119	created = graph.create_operation(
120	op_type='QuantizeFloating',
121	attributes={
122	'min': config.quant_min,
123	'max': config.quant_max,
124	'exponent': config.exponent_bits,
125	'mantissa': config.mantissa_bits})
126
127	if config.policy.has_property(QuantizationProperty.PER_CHANNEL):
128	created.attributes['axis'] = config.channel_axis
129	else: created.attributes['axis'] = None
130
131	if var in op.inputs: graph.insert_op_before(A=created, B=op, input_idx=op.inputs.index(var))
132	elif var in op.outputs: graph.insert_op_after(A=created, B=op, output_idx=op.outputs.index(var))
133	else: raise ValueError(f'Unexpected Error in Exporting Op {op.name}({op.type}).')
134
135	graph.create_variable(name=None, value=scale, is_parameter=True, dest_ops=[created])
136	graph.create_variable(name=None, value=offset, is_parameter=True, dest_ops=[created])
137
138	created.outputs[0].shape = var.shape

Callers 2

remove_activation_opsMethod · 0.95

convert_operationMethod · 0.95

Calls 9

infer_qtypeMethod · 0.95

convert_any_to_torch_tensorFunction · 0.90

ppq_tensor_roundFunction · 0.90

has_propertyMethod · 0.80

typeMethod · 0.80

create_operationMethod · 0.80

insert_op_beforeMethod · 0.80

insert_op_afterMethod · 0.80

create_variableMethod · 0.80

Tested by

no test coverage detected