PPQ PassiveParameterQuantizePass completes the quantization of positive parameters. By default, all parameters with initial state will be quantized during this optimization, all non-parameter tensors will be excluded from this pass by temporary dequantization. Then, operation observ
| 154 | |
| 155 | |
| 156 | class ParameterQuantizePass(QuantizationOptimizationPass): |
| 157 | """PPQ PassiveParameterQuantizePass completes the quantization of positive |
| 158 | parameters. By default, all parameters with initial state will be quantized |
| 159 | during this optimization, all non-parameter tensors will be excluded from |
| 160 | this pass by temporary dequantization. |
| 161 | |
| 162 | Then, operation observers will be established automatically to record necessary statistics, |
| 163 | observers are also responsible for rendering quantization configuration (computing scale and offset). |
| 164 | |
| 165 | This pass needs no data, however it uses fake data to finish a dummy forward process. |
| 166 | see also: TorchExecutor.dummy_forward function |
| 167 | """ |
| 168 | def __init__(self, method: str = None): |
| 169 | self._method = method |
| 170 | super().__init__(name='PPQ Parameter Quantization Pass') |
| 171 | |
| 172 | def optimize( |
| 173 | self, |
| 174 | graph: BaseGraph, |
| 175 | dataloader: Iterable, |
| 176 | executor: TorchExecutor, |
| 177 | **kwargs |
| 178 | ) -> None: |
| 179 | # build observer and hook for each quantable operation |
| 180 | hooks, observers, state_records = {}, {}, {} |
| 181 | for op_name, operation in graph.operations.items(): |
| 182 | if not isinstance(operation, QuantableOperation): continue |
| 183 | |
| 184 | for config, var in operation.config_with_variable: |
| 185 | # deactivate non-parameter variable quantization just for now |
| 186 | if not var.is_parameter: |
| 187 | state_records[config] = config.state |
| 188 | config.state = QuantizationStates.FP32 |
| 189 | elif self._method is not None: |
| 190 | # override quantizer's setting if necessary |
| 191 | config.observer_algorithm = self._method |
| 192 | |
| 193 | observer = OperationObserver( |
| 194 | operation=executor._graph.operations[op_name], |
| 195 | monitor_outputs=False, monitor_inputs=False) |
| 196 | observers[op_name] = observer |
| 197 | hooks[op_name] = observer.hook |
| 198 | |
| 199 | # dummy forward, quant all parameter. |
| 200 | assert isinstance(executor, TorchExecutor), \ |
| 201 | 'ParameterQuantizePass Only support TorchExecutor now.' |
| 202 | executor.dummy_forward(hooks=hooks) |
| 203 | |
| 204 | # render quantization config, restore non-parameter quantization state |
| 205 | for op_name, operation in graph.operations.items(): |
| 206 | if not isinstance(operation, QuantableOperation): continue |
| 207 | |
| 208 | for cfg, var in operation.config_with_variable: |
| 209 | if not var.is_parameter: |
| 210 | cfg.state = state_records[cfg] |
| 211 | |
| 212 | observer = observers[op_name] |
| 213 | assert isinstance(observer, OperationObserver) |
no outgoing calls
no test coverage detected