Function dequantize

tensorrt_llm/quantization/functional.py:769–787 · view source on GitHub ↗

(input: Tensor,
               scale_factor: Tensor,
               axis: int = -1,
               output_type: Union[str, trt.DataType] = 'float16')

Source from the content-addressed store, hash-verified

767
768
769	def dequantize(input: Tensor,
770	scale_factor: Tensor,
771	axis: int = -1,
772	output_type: Union[str, trt.DataType] = 'float16') -> Tensor:
773
774	if isinstance(output_type, str):
775	output_type = str_dtype_to_trt(output_type)
776
777	layer = default_trtnet().add_dequantize(input.trt_tensor,
778	scale_factor.trt_tensor,
779	output_type)
780	layer.axis = axis
781
782	if not default_net().strongly_typed:
783	layer.precision = input.dtype
784
785	output = _create_tensor(layer.get_output(0), layer)
786
787	return output
788
789
790	def quantize_per_token(

test_dequantizeMethod · 0.90

forwardMethod · 0.85

smooth_quant_gemmFunction · 0.85

weight_only_quant_matmulFunction · 0.85

weight_only_groupwise_quant_matmulFunction · 0.85

forwardMethod · 0.85

str_dtype_to_trtFunction · 0.85

default_trtnetFunction · 0.85

default_netFunction · 0.85

_create_tensorFunction · 0.85

get_outputMethod · 0.45

test_dequantizeMethod · 0.72