MCPcopy Index your code
hub / github.com/NVIDIA/TensorRT-LLM / to_dict

Method to_dict

tensorrt_llm/quantization/mode.py:437–471  ·  view source on GitHub ↗
(self)

Source from the content-addressed store, hash-verified

435 return quant_mode
436
437 def to_dict(self):
438 return {
439 'use_smooth_quant':
440 self.has_act_and_weight_quant(),
441 'per_channel':
442 self.has_per_channel_scaling(),
443 'per_token':
444 self.has_per_token_dynamic_scaling(),
445 'per_group':
446 self.has_per_group_scaling(),
447 'int8_kv_cache':
448 self.has_int8_kv_cache(),
449 'enable_fp8':
450 self.has_fp8_qdq(),
451 'enable_fp8_rowwise':
452 self.has_fp8_rowwise(),
453 'enable_fp8_block_scales':
454 self.has_fp8_block_scales(),
455 'enable_nvfp4':
456 self.has_nvfp4(),
457 'enable_w4a8_nvfp4_fp8':
458 self.has_w4a8_nvfp4_fp8(),
459 'enable_w4a8_mxfp4_fp8':
460 self.has_w4a8_mxfp4_fp8(),
461 'enable_w4a8_mxfp4_mxfp8':
462 self.has_w4a8_mxfp4_mxfp8(),
463 'enable_w4a16_mxfp4':
464 self.has_w4a16_mxfp4(),
465 'fp8_kv_cache':
466 self.has_fp8_kv_cache(),
467 'use_weight_only':
468 self.is_weight_only(),
469 'weight_only_precision':
470 'int8' if self.is_int8_weight_only() else 'int4',
471 }
472
473
474class GroupwiseQuantAlgo:

Callers 1

quantize_and_exportFunction · 0.45

Calls 15

has_per_group_scalingMethod · 0.95
has_int8_kv_cacheMethod · 0.95
has_fp8_qdqMethod · 0.95
has_fp8_rowwiseMethod · 0.95
has_fp8_block_scalesMethod · 0.95
has_nvfp4Method · 0.95
has_w4a8_nvfp4_fp8Method · 0.95
has_w4a8_mxfp4_fp8Method · 0.95
has_w4a8_mxfp4_mxfp8Method · 0.95

Tested by

no test coverage detected