Method use_weight_only

tensorrt_llm/quantization/mode.py:356–362 · view source on GitHub ↗

(use_int4_weights=False, per_group=False)

Source from the content-addressed store, hash-verified

354
355	@staticmethod
356	def use_weight_only(use_int4_weights=False, per_group=False):
357	return QuantMode.from_description(quantize_weights=True,
358	quantize_activations=False,
359	per_token=False,
360	per_channel=False,
361	per_group=per_group,
362	use_int4_weights=use_int4_weights)
363
364	@staticmethod
365	def from_quant_algo(

test_moe_w8a16Method · 0.80

test_moe_w4a16_groupwiseMethod · 0.80

test_moe_w4a8_groupwiseMethod · 0.80

test_from_descriptionMethod · 0.80

test_mixture_of_expertsMethod · 0.80

test_mlp_comparisonMethod · 0.80

test_mlp_lora_comparisonMethod · 0.80

__init__Method · 0.80

from_quant_algoMethod · 0.80

from_descriptionMethod · 0.80

test_moe_w8a16Method · 0.64

test_moe_w4a16_groupwiseMethod · 0.64

test_moe_w4a8_groupwiseMethod · 0.64

test_from_descriptionMethod · 0.64

test_mixture_of_expertsMethod · 0.64

test_mlp_comparisonMethod · 0.64

test_mlp_lora_comparisonMethod · 0.64