| 267 | |
| 268 | @staticmethod |
| 269 | def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params: |
| 270 | with open(config_path) as f: |
| 271 | config = json.load(f) |
| 272 | |
| 273 | rope_scaling_type = f_rope_scale = n_orig_ctx = rope_finetuned = None |
| 274 | rope_scaling = config.get("rope_scaling") |
| 275 | |
| 276 | if rope_scaling is not None and (typ := rope_scaling.get("type")): |
| 277 | rope_factor = rope_scaling.get("factor") |
| 278 | f_rope_scale = rope_factor |
| 279 | if typ == "linear": |
| 280 | rope_scaling_type = gguf.RopeScalingType.LINEAR |
| 281 | elif typ == "yarn": |
| 282 | rope_scaling_type = gguf.RopeScalingType.YARN |
| 283 | n_orig_ctx = rope_scaling['original_max_position_embeddings'] |
| 284 | rope_finetuned = rope_scaling['finetuned'] |
| 285 | else: |
| 286 | raise NotImplementedError(f'Unknown rope scaling type: {typ}') |
| 287 | |
| 288 | if "max_sequence_length" in config: |
| 289 | n_ctx = config["max_sequence_length"] |
| 290 | elif "max_position_embeddings" in config: |
| 291 | n_ctx = config["max_position_embeddings"] |
| 292 | else: |
| 293 | msg = """\ |
| 294 | failed to guess 'n_ctx'. This model is unknown or unsupported. |
| 295 | Suggestion: provide 'config.json' of the model in the same directory containing model files.""" |
| 296 | raise KeyError(textwrap.dedent(msg)) |
| 297 | |
| 298 | n_experts = None |
| 299 | n_experts_used = None |
| 300 | |
| 301 | if "num_local_experts" in config: |
| 302 | n_experts = config["num_local_experts"] |
| 303 | n_experts_used = config["num_experts_per_tok"] |
| 304 | |
| 305 | return Params( |
| 306 | n_vocab = config["vocab_size"], |
| 307 | n_embd = config["hidden_size"], |
| 308 | n_layer = config["num_hidden_layers"], |
| 309 | n_ctx = n_ctx, |
| 310 | n_ff = config["intermediate_size"], |
| 311 | n_head = (n_head := config["num_attention_heads"]), |
| 312 | n_head_kv = config.get("num_key_value_heads", n_head), |
| 313 | n_experts = n_experts, |
| 314 | n_experts_used = n_experts_used, |
| 315 | f_norm_eps = config["rms_norm_eps"], |
| 316 | f_rope_freq_base = config.get("rope_theta"), |
| 317 | rope_scaling_type = rope_scaling_type, |
| 318 | f_rope_scale = f_rope_scale, |
| 319 | n_orig_ctx = n_orig_ctx, |
| 320 | rope_finetuned = rope_finetuned, |
| 321 | ) |
| 322 | |
| 323 | # LLaMA v2 70B params.json |
| 324 | # {"dim": 8192, "multiple_of": 4096, "ffn_dim_multiplier": 1.3, "n_heads": 64, "n_kv_heads": 8, "n_layers": 80, "norm_eps": 1e-05, "vocab_size": -1} |