hub / github.com/microsoft/BitNet / write_tensor_data

Method write_tensor_data

utils/convert.py:1178–1208 · view source on GitHub ↗

(self, ftype: GGMLFileType, model: LazyModel, concurrency: int)

Source from the content-addressed store, hash-verified

1176	self.gguf.write_ti_data_to_file()
1177
1178	def write_tensor_data(self, ftype: GGMLFileType, model: LazyModel, concurrency: int) -> None:
1179	ndarrays_inner = bounded_parallel_map(OutputFile.do_item, model.items(), concurrency=concurrency)
1180	if ftype == GGMLFileType.MostlyQ8_0:
1181	ndarrays = bounded_parallel_map(
1182	OutputFile.maybe_do_quantize, ndarrays_inner, concurrency=concurrency, max_workers=concurrency,
1183	use_processpool_executor=True,
1184	)
1185	# elif ftype == GGMLFileType.MostlyI2:
1186	# # ndarrays = bounded_parallel_map(
1187	# # OutputFile.maybe_do_transform, ndarrays_inner, concurrency=concurrency, max_workers=concurrency, use_processpool_executor=True,)
1188	# ndarrays = map(OutputFile.maybe_do_transform, ndarrays_inner)
1189	else:
1190	ndarrays = map(OutputFile.maybe_do_quantize, ndarrays_inner)
1191
1192	start = time.time()
1193	for i, ((name, lazy_tensor), ndarray) in enumerate(zip(model.items(), ndarrays)):
1194	ndarray, i2_scale = ndarray
1195	elapsed = time.time() - start
1196	size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
1197	padi = len(str(len(model)))
1198	logger.info(
1199	f"[{i + 1:{padi}d}/{len(model)}] Writing tensor {name:38s} \| size {size:16} \| type {lazy_tensor.data_type.name:4} \| T+{int(elapsed):4}"
1200	)
1201
1202	if i2_scale is not None:
1203	i2_scale = np.tile(i2_scale, 8)
1204	ndarray = preprocess_weights(ndarray)
1205	self.gguf.write_tensor_data(ndarray)
1206	self.gguf.write_tensor_data(i2_scale)
1207	else:
1208	self.gguf.write_tensor_data(ndarray)
1209
1210	def close(self) -> None:
1211	self.gguf.close()

Callers 1

write_allMethod · 0.95

Calls 2

bounded_parallel_mapFunction · 0.70

preprocess_weightsFunction · 0.70

Tested by

no test coverage detected