MCPcopy Index your code
hub / github.com/microsoft/BitNet / Params

Class Params

utils/convert.py:208–387  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

206
207@dataclass
208class Params:
209 n_vocab: int
210 n_embd: int
211 n_layer: int
212 n_ctx: int
213 n_ff: int
214 n_head: int
215 n_head_kv: int
216 n_experts: int | None = None
217 n_experts_used: int | None = None
218 f_norm_eps: float | None = None
219
220 rope_scaling_type: gguf.RopeScalingType | None = None
221 f_rope_freq_base: float | None = None
222 f_rope_scale: float | None = None
223 n_orig_ctx: int | None = None
224 rope_finetuned: bool | None = None
225
226 ftype: GGMLFileType | None = None
227
228 # path to the directory containing the model files
229 path_model: Path | None = None
230
231 @staticmethod
232 def guessed(model: LazyModel) -> Params:
233 # try transformer naming first
234 n_vocab, n_embd = model["model.embed_tokens.weight"].shape if "model.embed_tokens.weight" in model else model["tok_embeddings.weight"].shape
235
236 # try transformer naming first
237 if "model.layers.0.self_attn.q_proj.weight" in model:
238 n_layer = next(i for i in itertools.count() if f"model.layers.{i}.self_attn.q_proj.weight" not in model)
239 elif "model.layers.0.self_attn.W_pack.weight" in model: # next: try baichuan naming
240 n_layer = next(i for i in itertools.count() if f"model.layers.{i}.self_attn.W_pack.weight" not in model)
241 else:
242 n_layer = next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model)
243
244 if n_layer < 1:
245 msg = """\
246 failed to guess 'n_layer'. This model is unknown or unsupported.
247 Suggestion: provide 'config.json' of the model in the same directory containing model files."""
248 raise KeyError(textwrap.dedent(msg))
249
250 n_head = n_embd // 128 # guessed
251 n_mult = 256 # guessed
252
253 # TODO: verify this
254 n_ff = int(2 * (4 * n_embd) / 3)
255 n_ff = n_mult * ((n_ff + n_mult - 1) // n_mult)
256
257 return Params(
258 n_vocab = n_vocab,
259 n_embd = n_embd,
260 n_layer = n_layer,
261 n_ctx = -1,
262 n_ff = n_ff,
263 n_head = n_head,
264 n_head_kv = n_head,
265 f_norm_eps = 1e-5,

Callers 3

guessedMethod · 0.70
loadHFTransformerJsonMethod · 0.70

Calls

no outgoing calls

Tested by

no test coverage detected