(self)
| 859 | return [(self.map_tensor_name(name), data_torch)] |
| 860 | |
| 861 | def write_tensors(self): |
| 862 | max_name_len = max(len(s) for _, s in self.tensor_map.mapping.values()) + len(".weight,") |
| 863 | |
| 864 | for name, data_torch in self.generate_tensors(): |
| 865 | # we don't need these |
| 866 | if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): |
| 867 | continue |
| 868 | |
| 869 | old_dtype = data_torch.dtype |
| 870 | |
| 871 | # convert any unsupported data types to float32 |
| 872 | if data_torch.dtype not in (torch.float16, torch.float32): |
| 873 | data_torch = data_torch.to(torch.float32) |
| 874 | |
| 875 | # use the first number-like part of the tensor name as the block id |
| 876 | bid = None |
| 877 | for part in name.split("."): |
| 878 | if part.isdecimal(): |
| 879 | bid = int(part) |
| 880 | break |
| 881 | |
| 882 | for new_name, data in ((n, d.squeeze().numpy()) for n, d in self.modify_tensors(data_torch, name, bid)): |
| 883 | data: np.ndarray = data # type hint |
| 884 | data_shape = data.shape |
| 885 | n_dims = len(data.shape) |
| 886 | data_dtype = data.dtype |
| 887 | data_qtype: gguf.GGMLQuantizationType | None = None |
| 888 | |
| 889 | # when both are True, f32 should win |
| 890 | # extra_f32 = self.extra_f32_tensors(name, new_name, bid, n_dims) |
| 891 | # extra_f16 = self.extra_f16_tensors(name, new_name, bid, n_dims) |
| 892 | extra_f32 = False |
| 893 | extra_f16 = False |
| 894 | |
| 895 | # Most of the codebase that takes in 1D tensors or norms only handles F32 tensors |
| 896 | # Conditions should closely match those in llama_model_quantize_internal in llama.cpp |
| 897 | extra_f32 = any(cond for cond in ( |
| 898 | extra_f32, |
| 899 | n_dims == 1, |
| 900 | new_name.endswith("_norm.weight"), |
| 901 | )) |
| 902 | |
| 903 | # Some tensor types are always in float32 |
| 904 | extra_f32 = extra_f32 or any(self.match_model_tensor_name(new_name, key, bid) for key in ( |
| 905 | gguf.MODEL_TENSOR.FFN_GATE_INP, |
| 906 | gguf.MODEL_TENSOR.POS_EMBD, |
| 907 | gguf.MODEL_TENSOR.TOKEN_TYPES, |
| 908 | # for debug / delete when inference |
| 909 | gguf.MODEL_TENSOR.TOKEN_EMBD, |
| 910 | )) |
| 911 | |
| 912 | # if f16 desired, convert any float32 2-dim weight tensors to float16 |
| 913 | extra_f16 = any(cond for cond in ( |
| 914 | extra_f16, |
| 915 | (name.endswith(".weight") and n_dims >= 2), |
| 916 | )) |
| 917 | |
| 918 | suit_i2 = True |
nothing calls this directly
no test coverage detected