Method to_dict

tensorrt_llm/quantization/mode.py:437–471 · view source on GitHub ↗

(self)

Source from the content-addressed store, hash-verified

435	return quant_mode
436
437	def to_dict(self):
438	return {
439	'use_smooth_quant':
440	self.has_act_and_weight_quant(),
441	'per_channel':
442	self.has_per_channel_scaling(),
443	'per_token':
444	self.has_per_token_dynamic_scaling(),
445	'per_group':
446	self.has_per_group_scaling(),
447	'int8_kv_cache':
448	self.has_int8_kv_cache(),
449	'enable_fp8':
450	self.has_fp8_qdq(),
451	'enable_fp8_rowwise':
452	self.has_fp8_rowwise(),
453	'enable_fp8_block_scales':
454	self.has_fp8_block_scales(),
455	'enable_nvfp4':
456	self.has_nvfp4(),
457	'enable_w4a8_nvfp4_fp8':
458	self.has_w4a8_nvfp4_fp8(),
459	'enable_w4a8_mxfp4_fp8':
460	self.has_w4a8_mxfp4_fp8(),
461	'enable_w4a8_mxfp4_mxfp8':
462	self.has_w4a8_mxfp4_mxfp8(),
463	'enable_w4a16_mxfp4':
464	self.has_w4a16_mxfp4(),
465	'fp8_kv_cache':
466	self.has_fp8_kv_cache(),
467	'use_weight_only':
468	self.is_weight_only(),
469	'weight_only_precision':
470	'int8' if self.is_int8_weight_only() else 'int4',
471	}
472
473
474	class GroupwiseQuantAlgo:

quantize_and_exportFunction · 0.45

has_act_and_weight_quantMethod · 0.95

has_per_channel_scalingMethod · 0.95

has_per_token_dynamic_scalingMethod · 0.95

has_per_group_scalingMethod · 0.95

has_int8_kv_cacheMethod · 0.95

has_fp8_qdqMethod · 0.95

has_fp8_rowwiseMethod · 0.95

has_fp8_block_scalesMethod · 0.95

has_nvfp4Method · 0.95

has_w4a8_nvfp4_fp8Method · 0.95

has_w4a8_mxfp4_fp8Method · 0.95

has_w4a8_mxfp4_mxfp8Method · 0.95

no test coverage detected