MCPcopy Index your code
hub / github.com/microsoft/BitNet / set_gguf_parameters

Method set_gguf_parameters

utils/generate-dummy-bitnet-model.py:188–229  ·  view source on GitHub ↗
(self)

Source from the content-addressed store, hash-verified

186 return new_name
187
188 def set_gguf_parameters(self):
189 self.gguf_writer.add_name(self.dir_model.name)
190 self.gguf_writer.add_block_count(self.block_count)
191
192 if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx"], optional=True)) is not None:
193 self.gguf_writer.add_context_length(n_ctx)
194 logger.info(f"gguf: context length = {n_ctx}")
195
196 n_embd = self.find_hparam(["hidden_size", "n_embd"])
197 self.gguf_writer.add_embedding_length(n_embd)
198 logger.info(f"gguf: embedding length = {n_embd}")
199
200 if (n_ff := self.find_hparam(["intermediate_size", "n_inner"], optional=True)) is not None:
201 self.gguf_writer.add_feed_forward_length(n_ff)
202 logger.info(f"gguf: feed forward length = {n_ff}")
203
204 n_head = self.find_hparam(["num_attention_heads", "n_head"])
205 self.gguf_writer.add_head_count(n_head)
206 logger.info(f"gguf: head count = {n_head}")
207
208 if (n_head_kv := self.hparams.get("num_key_value_heads")) is not None:
209 self.gguf_writer.add_head_count_kv(n_head_kv)
210 logger.info(f"gguf: key-value head count = {n_head_kv}")
211
212 if (rope_theta := self.hparams.get("rope_theta")) is not None:
213 self.gguf_writer.add_rope_freq_base(rope_theta)
214 logger.info(f"gguf: rope theta = {rope_theta}")
215 if (f_rms_eps := self.hparams.get("rms_norm_eps")) is not None:
216 self.gguf_writer.add_layer_norm_rms_eps(f_rms_eps)
217 logger.info(f"gguf: rms norm epsilon = {f_rms_eps}")
218 if (f_norm_eps := self.find_hparam(["layer_norm_eps", "layer_norm_epsilon", "norm_epsilon"], optional=True)) is not None:
219 self.gguf_writer.add_layer_norm_eps(f_norm_eps)
220 logger.info(f"gguf: layer norm epsilon = {f_norm_eps}")
221 if (n_experts := self.hparams.get("num_local_experts")) is not None:
222 self.gguf_writer.add_expert_count(n_experts)
223 logger.info(f"gguf: expert count = {n_experts}")
224 if (n_experts_used := self.hparams.get("num_experts_per_tok")) is not None:
225 self.gguf_writer.add_expert_used_count(n_experts_used)
226 logger.info(f"gguf: experts used count = {n_experts_used}")
227
228 self.gguf_writer.add_file_type(self.ftype)
229 logger.info(f"gguf: file type = {self.ftype}")
230
231 def write_tensors(self):
232 block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer")))

Callers 2

set_gguf_parametersMethod · 0.45
mainFunction · 0.45

Calls 1

find_hparamMethod · 0.95

Tested by

no test coverage detected