(self)
| 229 | logger.info(f"gguf: file type = {self.ftype}") |
| 230 | |
| 231 | def write_tensors(self): |
| 232 | block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer"))) |
| 233 | tensor_map = gguf.get_tensor_name_map(self.model_arch, block_count) |
| 234 | for name, data_torch in self.get_tensors(): |
| 235 | # we don't need these |
| 236 | if name.endswith((".attention.masked_bias", ".attention.bias", ".attention.rotary_emb.inv_freq")): |
| 237 | continue |
| 238 | |
| 239 | old_dtype = data_torch.dtype |
| 240 | |
| 241 | # convert any unsupported data types to float32 |
| 242 | if data_torch.dtype not in (torch.float16, torch.float32): |
| 243 | data_torch = data_torch.to(torch.float32) |
| 244 | |
| 245 | data = data_torch.squeeze().numpy() |
| 246 | |
| 247 | # map tensor names |
| 248 | new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) |
| 249 | if new_name is None: |
| 250 | raise ValueError(f"Can not map tensor {name!r}") |
| 251 | |
| 252 | n_dims = len(data.shape) |
| 253 | data_dtype = data.dtype |
| 254 | |
| 255 | # if f32 desired, convert any float16 to float32 |
| 256 | if self.ftype == 0 and data_dtype == np.float16: |
| 257 | data = data.astype(np.float32) |
| 258 | |
| 259 | # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32 |
| 260 | if self.ftype == 1 and data_dtype == np.float16 and (n_dims == 1 or new_name.endswith("_norm.weight")): |
| 261 | data = data.astype(np.float32) |
| 262 | |
| 263 | # if f16 desired, convert any float32 2-dim weight tensors to float16 |
| 264 | if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: |
| 265 | data = data.astype(np.float16) |
| 266 | |
| 267 | logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") |
| 268 | |
| 269 | self.gguf_writer.add_tensor(new_name, data) |
| 270 | |
| 271 | def write(self): |
| 272 | self.write_tensors() |
no test coverage detected