Class UnquantizedTensor

utils/convert-ms-to-gguf-bitnet.py:781–808 · view source on GitHub ↗

Source from the content-addressed store, hash-verified

779	return ans, scale
780
781	class UnquantizedTensor(Tensor):
782	def __init__(self, ndarray: NDArray, i2_scale: NDArray = None):
783	assert isinstance(ndarray, np.ndarray)
784	self.ndarray = ndarray
785	self.data_type = NUMPY_TYPE_TO_DATA_TYPE[ndarray.dtype]
786	self.i2_scale = i2_scale
787
788	def astype(self, data_type: DataType) -> UnquantizedTensor:
789	dtype = data_type.dtype
790	if self.data_type == DT_BF16:
791	self.ndarray = bf16_to_fp32(self.ndarray)
792	if dtype == np.uint8:
793	self.ndarray, self.i2_scale = transform_to_i2(self.ndarray)
794	return UnquantizedTensor(self.ndarray.astype(dtype), self.i2_scale)
795
796	def to_ggml(self) -> Self:
797	return self
798
799	def permute_part(self, n_part: int, n_head: int, n_head_kv: int) -> UnquantizedTensor:
800	r = self.ndarray.shape[0] // 3
801	return UnquantizedTensor(permute(self.ndarray[r * n_part : r * n_part + r, ...], n_head, n_head_kv))
802
803	def part(self, n_part: int) -> UnquantizedTensor:
804	r = self.ndarray.shape[0] // 3
805	return UnquantizedTensor(self.ndarray[r * n_part : r * n_part + r, ...])
806
807	def permute(self, n_head: int, n_head_kv: int) -> UnquantizedTensor:
808	return UnquantizedTensor(permute(self.ndarray, n_head, n_head_kv))
809
810
811	def load_unquantized(lazy_tensor: LazyTensor, expected_dtype: Any = None, convert: bool = False) -> NDArray:

astypeMethod · 0.70

permute_partMethod · 0.70

partMethod · 0.70

permuteMethod · 0.70

loadFunction · 0.70

no outgoing calls

no test coverage detected