MCPcopy Index your code
hub / github.com/microsoft/BitNet / UnquantizedTensor

Class UnquantizedTensor

utils/convert-ms-to-gguf-bitnet.py:781–808  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

779 return ans, scale
780
781class UnquantizedTensor(Tensor):
782 def __init__(self, ndarray: NDArray, i2_scale: NDArray = None):
783 assert isinstance(ndarray, np.ndarray)
784 self.ndarray = ndarray
785 self.data_type = NUMPY_TYPE_TO_DATA_TYPE[ndarray.dtype]
786 self.i2_scale = i2_scale
787
788 def astype(self, data_type: DataType) -> UnquantizedTensor:
789 dtype = data_type.dtype
790 if self.data_type == DT_BF16:
791 self.ndarray = bf16_to_fp32(self.ndarray)
792 if dtype == np.uint8:
793 self.ndarray, self.i2_scale = transform_to_i2(self.ndarray)
794 return UnquantizedTensor(self.ndarray.astype(dtype), self.i2_scale)
795
796 def to_ggml(self) -> Self:
797 return self
798
799 def permute_part(self, n_part: int, n_head: int, n_head_kv: int) -> UnquantizedTensor:
800 r = self.ndarray.shape[0] // 3
801 return UnquantizedTensor(permute(self.ndarray[r * n_part : r * n_part + r, ...], n_head, n_head_kv))
802
803 def part(self, n_part: int) -> UnquantizedTensor:
804 r = self.ndarray.shape[0] // 3
805 return UnquantizedTensor(self.ndarray[r * n_part : r * n_part + r, ...])
806
807 def permute(self, n_head: int, n_head_kv: int) -> UnquantizedTensor:
808 return UnquantizedTensor(permute(self.ndarray, n_head, n_head_kv))
809
810
811def load_unquantized(lazy_tensor: LazyTensor, expected_dtype: Any = None, convert: bool = False) -> NDArray:

Callers 5

astypeMethod · 0.70
permute_partMethod · 0.70
partMethod · 0.70
permuteMethod · 0.70
loadFunction · 0.70

Calls

no outgoing calls

Tested by

no test coverage detected