Insert a Quantize Node on given variable, according to given TensorQuantizationConfig. There is two basic type of Quantize Node: QuantizeLinear and QuantizeFloating.
(
self, graph: BaseGraph,
var: Variable, config: TensorQuantizationConfig,
op: Operation)
| 79 | return offset_dtype, value_dtype |
| 80 | |
| 81 | def insert_quantize_node( |
| 82 | self, graph: BaseGraph, |
| 83 | var: Variable, config: TensorQuantizationConfig, |
| 84 | op: Operation) -> Operation: |
| 85 | """ |
| 86 | Insert a Quantize Node on given variable, according to given TensorQuantizationConfig. |
| 87 | There is two basic type of Quantize Node: QuantizeLinear and QuantizeFloating. |
| 88 | """ |
| 89 | if config.policy.has_property(QuantizationProperty.LINEAR): |
| 90 | # Following code will export Linear Quantization Config |
| 91 | # That is for FP32 -> INT |
| 92 | offset_dtype, value_type = self.infer_qtype(config) |
| 93 | scale = convert_any_to_torch_tensor(config.scale.clone(), dtype=torch.float32) |
| 94 | offset = ppq_tensor_round(config.offset.clone()).type(offset_dtype) |
| 95 | |
| 96 | created = graph.create_operation(op_type='QuantizeLinear', attributes={}) |
| 97 | if config.policy.has_property(QuantizationProperty.PER_CHANNEL): |
| 98 | created.attributes['axis'] = config.channel_axis |
| 99 | else: created.attributes['axis'] = None |
| 100 | |
| 101 | if var in op.inputs: graph.insert_op_before(A=created, B=op, input_idx=op.inputs.index(var)) |
| 102 | elif var in op.outputs: graph.insert_op_after(A=created, B=op, output_idx=op.outputs.index(var)) |
| 103 | else: raise ValueError(f'Unexpected Error in Exporting Op {op.name}({op.type}).') |
| 104 | |
| 105 | graph.create_variable(name=None, value=scale, is_parameter=True, dest_ops=[created]) |
| 106 | graph.create_variable(name=None, value=offset, is_parameter=True, dest_ops=[created]) |
| 107 | |
| 108 | created.outputs[0].dtype = value_type |
| 109 | created.outputs[0].shape = var.shape |
| 110 | created.inputs[0].shape = var.shape |
| 111 | return created |
| 112 | |
| 113 | elif config.policy.has_property(QuantizationProperty.FLOATING): |
| 114 | # Following code will export Linear Quantization Config |
| 115 | # That is for FP32 -> FP8 |
| 116 | scale = convert_any_to_torch_tensor(config.scale.clone(), dtype=torch.float32) |
| 117 | offset = convert_any_to_torch_tensor(config.offset.clone(), dtype=torch.float32) |
| 118 | |
| 119 | created = graph.create_operation( |
| 120 | op_type='QuantizeFloating', |
| 121 | attributes={ |
| 122 | 'min': config.quant_min, |
| 123 | 'max': config.quant_max, |
| 124 | 'exponent': config.exponent_bits, |
| 125 | 'mantissa': config.mantissa_bits}) |
| 126 | |
| 127 | if config.policy.has_property(QuantizationProperty.PER_CHANNEL): |
| 128 | created.attributes['axis'] = config.channel_axis |
| 129 | else: created.attributes['axis'] = None |
| 130 | |
| 131 | if var in op.inputs: graph.insert_op_before(A=created, B=op, input_idx=op.inputs.index(var)) |
| 132 | elif var in op.outputs: graph.insert_op_after(A=created, B=op, output_idx=op.outputs.index(var)) |
| 133 | else: raise ValueError(f'Unexpected Error in Exporting Op {op.name}({op.type}).') |
| 134 | |
| 135 | graph.create_variable(name=None, value=scale, is_parameter=True, dest_ops=[created]) |
| 136 | graph.create_variable(name=None, value=offset, is_parameter=True, dest_ops=[created]) |
| 137 | |
| 138 | created.outputs[0].shape = var.shape |
no test coverage detected