(self)
| 435 | return quant_mode |
| 436 | |
| 437 | def to_dict(self): |
| 438 | return { |
| 439 | 'use_smooth_quant': |
| 440 | self.has_act_and_weight_quant(), |
| 441 | 'per_channel': |
| 442 | self.has_per_channel_scaling(), |
| 443 | 'per_token': |
| 444 | self.has_per_token_dynamic_scaling(), |
| 445 | 'per_group': |
| 446 | self.has_per_group_scaling(), |
| 447 | 'int8_kv_cache': |
| 448 | self.has_int8_kv_cache(), |
| 449 | 'enable_fp8': |
| 450 | self.has_fp8_qdq(), |
| 451 | 'enable_fp8_rowwise': |
| 452 | self.has_fp8_rowwise(), |
| 453 | 'enable_fp8_block_scales': |
| 454 | self.has_fp8_block_scales(), |
| 455 | 'enable_nvfp4': |
| 456 | self.has_nvfp4(), |
| 457 | 'enable_w4a8_nvfp4_fp8': |
| 458 | self.has_w4a8_nvfp4_fp8(), |
| 459 | 'enable_w4a8_mxfp4_fp8': |
| 460 | self.has_w4a8_mxfp4_fp8(), |
| 461 | 'enable_w4a8_mxfp4_mxfp8': |
| 462 | self.has_w4a8_mxfp4_mxfp8(), |
| 463 | 'enable_w4a16_mxfp4': |
| 464 | self.has_w4a16_mxfp4(), |
| 465 | 'fp8_kv_cache': |
| 466 | self.has_fp8_kv_cache(), |
| 467 | 'use_weight_only': |
| 468 | self.is_weight_only(), |
| 469 | 'weight_only_precision': |
| 470 | 'int8' if self.is_int8_weight_only() else 'int4', |
| 471 | } |
| 472 | |
| 473 | |
| 474 | class GroupwiseQuantAlgo: |
no test coverage detected