(self, ftype: GGMLFileType, model: LazyModel, concurrency: int)
| 1176 | self.gguf.write_ti_data_to_file() |
| 1177 | |
| 1178 | def write_tensor_data(self, ftype: GGMLFileType, model: LazyModel, concurrency: int) -> None: |
| 1179 | ndarrays_inner = bounded_parallel_map(OutputFile.do_item, model.items(), concurrency=concurrency) |
| 1180 | if ftype == GGMLFileType.MostlyQ8_0: |
| 1181 | ndarrays = bounded_parallel_map( |
| 1182 | OutputFile.maybe_do_quantize, ndarrays_inner, concurrency=concurrency, max_workers=concurrency, |
| 1183 | use_processpool_executor=True, |
| 1184 | ) |
| 1185 | # elif ftype == GGMLFileType.MostlyI2: |
| 1186 | # # ndarrays = bounded_parallel_map( |
| 1187 | # # OutputFile.maybe_do_transform, ndarrays_inner, concurrency=concurrency, max_workers=concurrency, use_processpool_executor=True,) |
| 1188 | # ndarrays = map(OutputFile.maybe_do_transform, ndarrays_inner) |
| 1189 | else: |
| 1190 | ndarrays = map(OutputFile.maybe_do_quantize, ndarrays_inner) |
| 1191 | |
| 1192 | start = time.time() |
| 1193 | for i, ((name, lazy_tensor), ndarray) in enumerate(zip(model.items(), ndarrays)): |
| 1194 | ndarray, i2_scale = ndarray |
| 1195 | elapsed = time.time() - start |
| 1196 | size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape) |
| 1197 | padi = len(str(len(model))) |
| 1198 | logger.info( |
| 1199 | f"[{i + 1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}" |
| 1200 | ) |
| 1201 | |
| 1202 | if i2_scale is not None: |
| 1203 | i2_scale = np.tile(i2_scale, 8) |
| 1204 | ndarray = preprocess_weights(ndarray) |
| 1205 | self.gguf.write_tensor_data(ndarray) |
| 1206 | self.gguf.write_tensor_data(i2_scale) |
| 1207 | else: |
| 1208 | self.gguf.write_tensor_data(ndarray) |
| 1209 | |
| 1210 | def close(self) -> None: |
| 1211 | self.gguf.close() |
no test coverage detected