Extract parameter count from model data. Resolution order: 1. authoritative model-card overrides for known mixed-precision MoEs 2. safetensors metadata (most reliable when present) 3. gguf metadata 4. config (estimated from hidden_size + num_layers + vocab_size) 5.
(model_data: dict)
| 479 | |
| 480 | |
| 481 | def _extract_param_count(model_data: dict) -> int: |
| 482 | """Extract parameter count from model data. |
| 483 | |
| 484 | Resolution order: |
| 485 | 1. authoritative model-card overrides for known mixed-precision MoEs |
| 486 | 2. safetensors metadata (most reliable when present) |
| 487 | 3. gguf metadata |
| 488 | 4. config (estimated from hidden_size + num_layers + vocab_size) |
| 489 | 5. name-based size hint (e.g. ``Qwen/Qwen3-32B`` → 32B) |
| 490 | 6. ``_KNOWN_PARAM_COUNTS`` lookup (for models like ``microsoft/phi-4`` |
| 491 | that have neither indexed metadata nor a size in the repo name) |
| 492 | |
| 493 | Returns 0 if none of the above succeed (caller drops the model). |
| 494 | """ |
| 495 | model_id = model_data.get("id", "") or "" |
| 496 | authoritative = _lookup_curated_count(_AUTHORITATIVE_PARAM_COUNTS, model_id) |
| 497 | if authoritative and authoritative > 0: |
| 498 | return authoritative |
| 499 | |
| 500 | # Try safetensors metadata first |
| 501 | safetensors = model_data.get("safetensors") |
| 502 | if safetensors and isinstance(safetensors, dict): |
| 503 | params = safetensors.get("total") |
| 504 | if params: |
| 505 | return int(params) |
| 506 | parameters = safetensors.get("parameters") |
| 507 | if isinstance(parameters, dict): |
| 508 | total = sum(parameters.values()) |
| 509 | if total > 0: |
| 510 | return total |
| 511 | |
| 512 | # Try gguf metadata |
| 513 | gguf_meta = model_data.get("gguf", {}) or {} |
| 514 | if isinstance(gguf_meta, dict): |
| 515 | total = gguf_meta.get("total") |
| 516 | if total and total > 0: |
| 517 | return int(total) |
| 518 | |
| 519 | # Try config |
| 520 | config = model_data.get("config", {}) or {} |
| 521 | # Estimate from hidden_size and num_layers if available |
| 522 | hidden = config.get("hidden_size", 0) |
| 523 | layers = config.get("num_hidden_layers", 0) |
| 524 | vocab = config.get("vocab_size", 0) |
| 525 | if hidden and layers and vocab: |
| 526 | # Rough: 12 * layers * hidden^2 + vocab * hidden * 2 |
| 527 | return 12 * layers * hidden * hidden + vocab * hidden * 2 |
| 528 | |
| 529 | # Fall back to ID-based hints — these are the recourse when HF doesn't |
| 530 | # index safetensors metadata for a repo (e.g. Qwen3-32B, phi-4, Mistral |
| 531 | # Small 3.2 24B). Without this branch these popular models silently |
| 532 | # disappear from the ranker. |
| 533 | # |
| 534 | # ``_KNOWN_PARAM_COUNTS`` is checked *before* the name hint because it |
| 535 | # is curated: for Llama-4-Scout-17B-16E (16-expert MoE) the name hint |
| 536 | # gives 17B (the active size) but the actual VRAM footprint is 109B. |
| 537 | known = _lookup_curated_count(_KNOWN_PARAM_COUNTS, model_id) |
| 538 | if known and known > 0: |
no test coverage detected