| 237 | |
| 238 | @staticmethod |
| 239 | def from_description(quantize_weights=False, |
| 240 | quantize_activations=False, |
| 241 | per_token=False, |
| 242 | per_channel=False, |
| 243 | per_group=False, |
| 244 | use_int4_weights=False, |
| 245 | use_int8_kv_cache=False, |
| 246 | use_fp8_kv_cache=False, |
| 247 | use_fp8_qdq=False, |
| 248 | use_fp8_block_scales=False, |
| 249 | use_fp8_rowwise=False, |
| 250 | use_nvfp4=False, |
| 251 | use_w4a8_nvfp4_fp8=False, |
| 252 | use_w4a8_qserve=False, |
| 253 | use_w4a8_mxfp4_fp8=False, |
| 254 | use_w4a8_mxfp4_mxfp8=False, |
| 255 | use_w4a16_mxfp4=False): |
| 256 | |
| 257 | def raise_error(): |
| 258 | raise ValueError(f"Unsupported combination of QuantMode args: " |
| 259 | f"{quantize_weights=}, " |
| 260 | f"{quantize_activations=}, " |
| 261 | f"{per_token=}, " |
| 262 | f"{per_channel=}, " |
| 263 | f"{per_group=}, " |
| 264 | f"{use_int4_weights=}, " |
| 265 | f"{use_int8_kv_cache=}, " |
| 266 | f"{use_fp8_kv_cache=}, " |
| 267 | f"{use_fp8_qdq=}, " |
| 268 | f"{use_fp8_block_scales=}, " |
| 269 | f"{use_fp8_rowwise=}, " |
| 270 | f"{use_nvfp4=}, " |
| 271 | f"{use_w4a8_qserve=}, " |
| 272 | f"{use_w4a8_mxfp4_fp8=}, " |
| 273 | f"{use_w4a8_mxfp4_mxfp8=}, " |
| 274 | f"{use_w4a16_mxfp4=}") |
| 275 | |
| 276 | # We must quantize weights when we quantize activations. |
| 277 | if quantize_activations and not quantize_weights: |
| 278 | raise_error() |
| 279 | |
| 280 | # If we set per_token or per_channel, we must quantize both weights and activations. |
| 281 | if (per_token or per_channel) and not (quantize_weights |
| 282 | and quantize_activations): |
| 283 | raise_error() |
| 284 | |
| 285 | mode = QuantMode(0) |
| 286 | |
| 287 | # Do we quantize the weights - if so, do we use INT4 or INT8? |
| 288 | if quantize_weights and use_int4_weights: |
| 289 | mode = mode | QuantMode.INT4_WEIGHTS |
| 290 | elif quantize_weights: |
| 291 | mode = mode | QuantMode.INT8_WEIGHTS |
| 292 | |
| 293 | # Do we quantize the activations? |
| 294 | if quantize_activations: |
| 295 | mode = mode | QuantMode.ACTIVATIONS |
| 296 | |