hub / github.com/microsoft/BitNet / write_tensors

Method write_tensors

utils/generate-dummy-bitnet-model.py:861–954 · view source on GitHub ↗

(self)

Source from the content-addressed store, hash-verified

859	return [(self.map_tensor_name(name), data_torch)]
860
861	def write_tensors(self):
862	max_name_len = max(len(s) for _, s in self.tensor_map.mapping.values()) + len(".weight,")
863
864	for name, data_torch in self.generate_tensors():
865	# we don't need these
866	if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
867	continue
868
869	old_dtype = data_torch.dtype
870
871	# convert any unsupported data types to float32
872	if data_torch.dtype not in (torch.float16, torch.float32):
873	data_torch = data_torch.to(torch.float32)
874
875	# use the first number-like part of the tensor name as the block id
876	bid = None
877	for part in name.split("."):
878	if part.isdecimal():
879	bid = int(part)
880	break
881
882	for new_name, data in ((n, d.squeeze().numpy()) for n, d in self.modify_tensors(data_torch, name, bid)):
883	data: np.ndarray = data # type hint
884	data_shape = data.shape
885	n_dims = len(data.shape)
886	data_dtype = data.dtype
887	data_qtype: gguf.GGMLQuantizationType \| None = None
888
889	# when both are True, f32 should win
890	# extra_f32 = self.extra_f32_tensors(name, new_name, bid, n_dims)
891	# extra_f16 = self.extra_f16_tensors(name, new_name, bid, n_dims)
892	extra_f32 = False
893	extra_f16 = False
894
895	# Most of the codebase that takes in 1D tensors or norms only handles F32 tensors
896	# Conditions should closely match those in llama_model_quantize_internal in llama.cpp
897	extra_f32 = any(cond for cond in (
898	extra_f32,
899	n_dims == 1,
900	new_name.endswith("_norm.weight"),
901	))
902
903	# Some tensor types are always in float32
904	extra_f32 = extra_f32 or any(self.match_model_tensor_name(new_name, key, bid) for key in (
905	gguf.MODEL_TENSOR.FFN_GATE_INP,
906	gguf.MODEL_TENSOR.POS_EMBD,
907	gguf.MODEL_TENSOR.TOKEN_TYPES,
908	# for debug / delete when inference
909	gguf.MODEL_TENSOR.TOKEN_EMBD,
910	))
911
912	# if f16 desired, convert any float32 2-dim weight tensors to float16
913	extra_f16 = any(cond for cond in (
914	extra_f16,
915	(name.endswith(".weight") and n_dims >= 2),
916	))
917
918	suit_i2 = True

Callers

nothing calls this directly

Calls 6

generate_tensorsMethod · 0.95

modify_tensorsMethod · 0.95

transform_to_tl1Method · 0.95

transform_to_tl2Method · 0.95

match_model_tensor_nameMethod · 0.45

astypeMethod · 0.45

Tested by

no test coverage detected