| 206 | |
| 207 | @dataclass |
| 208 | class Params: |
| 209 | n_vocab: int |
| 210 | n_embd: int |
| 211 | n_layer: int |
| 212 | n_ctx: int |
| 213 | n_ff: int |
| 214 | n_head: int |
| 215 | n_head_kv: int |
| 216 | n_experts: int | None = None |
| 217 | n_experts_used: int | None = None |
| 218 | f_norm_eps: float | None = None |
| 219 | |
| 220 | rope_scaling_type: gguf.RopeScalingType | None = None |
| 221 | f_rope_freq_base: float | None = None |
| 222 | f_rope_scale: float | None = None |
| 223 | n_orig_ctx: int | None = None |
| 224 | rope_finetuned: bool | None = None |
| 225 | |
| 226 | ftype: GGMLFileType | None = None |
| 227 | |
| 228 | # path to the directory containing the model files |
| 229 | path_model: Path | None = None |
| 230 | |
| 231 | @staticmethod |
| 232 | def guessed(model: LazyModel) -> Params: |
| 233 | # try transformer naming first |
| 234 | n_vocab, n_embd = model["model.embed_tokens.weight"].shape if "model.embed_tokens.weight" in model else model["tok_embeddings.weight"].shape |
| 235 | |
| 236 | # try transformer naming first |
| 237 | if "model.layers.0.self_attn.q_proj.weight" in model: |
| 238 | n_layer = next(i for i in itertools.count() if f"model.layers.{i}.self_attn.q_proj.weight" not in model) |
| 239 | elif "model.layers.0.self_attn.W_pack.weight" in model: # next: try baichuan naming |
| 240 | n_layer = next(i for i in itertools.count() if f"model.layers.{i}.self_attn.W_pack.weight" not in model) |
| 241 | else: |
| 242 | n_layer = next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model) |
| 243 | |
| 244 | if n_layer < 1: |
| 245 | msg = """\ |
| 246 | failed to guess 'n_layer'. This model is unknown or unsupported. |
| 247 | Suggestion: provide 'config.json' of the model in the same directory containing model files.""" |
| 248 | raise KeyError(textwrap.dedent(msg)) |
| 249 | |
| 250 | n_head = n_embd // 128 # guessed |
| 251 | n_mult = 256 # guessed |
| 252 | |
| 253 | # TODO: verify this |
| 254 | n_ff = int(2 * (4 * n_embd) / 3) |
| 255 | n_ff = n_mult * ((n_ff + n_mult - 1) // n_mult) |
| 256 | |
| 257 | return Params( |
| 258 | n_vocab = n_vocab, |
| 259 | n_embd = n_embd, |
| 260 | n_layer = n_layer, |
| 261 | n_ctx = -1, |
| 262 | n_ff = n_ff, |
| 263 | n_head = n_head, |
| 264 | n_head_kv = n_head, |
| 265 | f_norm_eps = 1e-5, |
no outgoing calls
no test coverage detected