Function estimate_weight_bytes

src/whichllm/engine/quantization.py:68–74 · view source on GitHub ↗

Estimate model weight size in bytes.

(model: ModelInfo, variant: GGUFVariant | None)

Source from the content-addressed store, hash-verified

66
67
68	def estimate_weight_bytes(model: ModelInfo, variant: GGUFVariant \| None) -> int:
69	"""Estimate model weight size in bytes."""
70	if variant:
71	return variant.file_size_bytes
72	quant_type = infer_non_gguf_quant_type(model.id)
73	bytes_per_weight = _NON_GGUF_BYTES_PER_WEIGHT.get(quant_type, 2.0)
74	return int(model.parameter_count * bytes_per_weight)
75
76
77	def quant_quality_penalty(model: ModelInfo, variant: GGUFVariant \| None) -> float:

display_jsonFunction · 0.90

check_compatibilityFunction · 0.90

estimate_tok_per_secFunction · 0.90

estimate_vramFunction · 0.90

test_estimate_weight_bytes_for_awqFunction · 0.90

test_estimate_weight_bytes_for_fp4_formatsFunction · 0.90

infer_non_gguf_quant_typeFunction · 0.85

test_estimate_weight_bytes_for_awqFunction · 0.72

test_estimate_weight_bytes_for_fp4_formatsFunction · 0.72