MCPcopy Index your code
hub / github.com/microsoft/BitNet / write_tensor_data

Method write_tensor_data

utils/convert.py:1178–1208  ·  view source on GitHub ↗
(self, ftype: GGMLFileType, model: LazyModel, concurrency: int)

Source from the content-addressed store, hash-verified

1176 self.gguf.write_ti_data_to_file()
1177
1178 def write_tensor_data(self, ftype: GGMLFileType, model: LazyModel, concurrency: int) -> None:
1179 ndarrays_inner = bounded_parallel_map(OutputFile.do_item, model.items(), concurrency=concurrency)
1180 if ftype == GGMLFileType.MostlyQ8_0:
1181 ndarrays = bounded_parallel_map(
1182 OutputFile.maybe_do_quantize, ndarrays_inner, concurrency=concurrency, max_workers=concurrency,
1183 use_processpool_executor=True,
1184 )
1185 # elif ftype == GGMLFileType.MostlyI2:
1186 # # ndarrays = bounded_parallel_map(
1187 # # OutputFile.maybe_do_transform, ndarrays_inner, concurrency=concurrency, max_workers=concurrency, use_processpool_executor=True,)
1188 # ndarrays = map(OutputFile.maybe_do_transform, ndarrays_inner)
1189 else:
1190 ndarrays = map(OutputFile.maybe_do_quantize, ndarrays_inner)
1191
1192 start = time.time()
1193 for i, ((name, lazy_tensor), ndarray) in enumerate(zip(model.items(), ndarrays)):
1194 ndarray, i2_scale = ndarray
1195 elapsed = time.time() - start
1196 size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
1197 padi = len(str(len(model)))
1198 logger.info(
1199 f"[{i + 1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}"
1200 )
1201
1202 if i2_scale is not None:
1203 i2_scale = np.tile(i2_scale, 8)
1204 ndarray = preprocess_weights(ndarray)
1205 self.gguf.write_tensor_data(ndarray)
1206 self.gguf.write_tensor_data(i2_scale)
1207 else:
1208 self.gguf.write_tensor_data(ndarray)
1209
1210 def close(self) -> None:
1211 self.gguf.close()

Callers 1

write_allMethod · 0.95

Calls 2

bounded_parallel_mapFunction · 0.70
preprocess_weightsFunction · 0.70

Tested by

no test coverage detected