| 779 | return ans, scale |
| 780 | |
| 781 | class UnquantizedTensor(Tensor): |
| 782 | def __init__(self, ndarray: NDArray, i2_scale: NDArray = None): |
| 783 | assert isinstance(ndarray, np.ndarray) |
| 784 | self.ndarray = ndarray |
| 785 | self.data_type = NUMPY_TYPE_TO_DATA_TYPE[ndarray.dtype] |
| 786 | self.i2_scale = i2_scale |
| 787 | |
| 788 | def astype(self, data_type: DataType) -> UnquantizedTensor: |
| 789 | dtype = data_type.dtype |
| 790 | if self.data_type == DT_BF16: |
| 791 | self.ndarray = bf16_to_fp32(self.ndarray) |
| 792 | if dtype == np.uint8: |
| 793 | self.ndarray, self.i2_scale = transform_to_i2(self.ndarray) |
| 794 | return UnquantizedTensor(self.ndarray.astype(dtype), self.i2_scale) |
| 795 | |
| 796 | def to_ggml(self) -> Self: |
| 797 | return self |
| 798 | |
| 799 | def permute_part(self, n_part: int, n_head: int, n_head_kv: int) -> UnquantizedTensor: |
| 800 | r = self.ndarray.shape[0] // 3 |
| 801 | return UnquantizedTensor(permute(self.ndarray[r * n_part : r * n_part + r, ...], n_head, n_head_kv)) |
| 802 | |
| 803 | def part(self, n_part: int) -> UnquantizedTensor: |
| 804 | r = self.ndarray.shape[0] // 3 |
| 805 | return UnquantizedTensor(self.ndarray[r * n_part : r * n_part + r, ...]) |
| 806 | |
| 807 | def permute(self, n_head: int, n_head_kv: int) -> UnquantizedTensor: |
| 808 | return UnquantizedTensor(permute(self.ndarray, n_head, n_head_kv)) |
| 809 | |
| 810 | |
| 811 | def load_unquantized(lazy_tensor: LazyTensor, expected_dtype: Any = None, convert: bool = False) -> NDArray: |
no outgoing calls
no test coverage detected