hub / github.com/Andyyyy64/whichllm / check_compatibility

Function check_compatibility

src/whichllm/engine/compatibility.py:118–261 · view source on GitHub ↗

Check if a model+variant can run on the given hardware.

(
    model: ModelInfo,
    variant: GGUFVariant | None,
    hardware: HardwareInfo,
    context_length: int = 4096,
)

Source from the content-addressed store, hash-verified

116
117
118	def check_compatibility(
119	model: ModelInfo,
120	variant: GGUFVariant \| None,
121	hardware: HardwareInfo,
122	context_length: int = 4096,
123	) -> CompatibilityResult:
124	"""Check if a model+variant can run on the given hardware."""
125	warnings: list[str] = []
126
127	vram_required = estimate_vram(model, variant, context_length)
128
129	usable_ram = effective_usable_ram(hardware.ram_bytes, hardware.ram_budget_bytes)
130
131	# Determine best GPU
132	best_gpu: GPUInfo \| None = None
133	best_gpu_available = 0
134	gpu_available_values: list[int] = []
135	candidate_gpus = _fit_candidate_gpus(hardware.gpus)
136	ram_budget_active = hardware.ram_budget_bytes is not None
137	for gpu in candidate_gpus:
138	gpu_available = _gpu_available_memory(
139	gpu, usable_ram, ram_budget_active=ram_budget_active
140	)
141	gpu_available_values.append(gpu_available)
142	if best_gpu is None or gpu_available > best_gpu_available:
143	best_gpu = gpu
144	best_gpu_available = gpu_available
145
146	vram_available = sum(gpu_available_values) if gpu_available_values else 0
147	fit_vram_available, uses_multi_gpu, multi_gpu_effective_vram = (
148	_multi_gpu_effective_vram(candidate_gpus, gpu_available_values, warnings)
149	)
150	if (
151	len(candidate_gpus) > 1
152	and not uses_multi_gpu
153	and any(gpu.shared_memory or gpu.vendor == "apple" for gpu in candidate_gpus)
154	):
155	vram_available = fit_vram_available
156	offload_ram_available = (
157	0
158	if best_gpu and (best_gpu.shared_memory or best_gpu.vendor == "apple")
159	else usable_ram
160	)
161
162	# Check compute capability for NVIDIA
163	if best_gpu and best_gpu.vendor == "nvidia" and best_gpu.compute_capability:
164	if best_gpu.compute_capability < MIN_COMPUTE_CAPABILITY_OLLAMA:
165	warnings.append(
166	f"Compute capability {best_gpu.compute_capability} is below "
167	f"minimum {MIN_COMPUTE_CAPABILITY_OLLAMA} for Ollama"
168	)
169
170	# Flag legacy Kepler GPUs that have no CUDA support in modern llama.cpp.
171	# They can still run, but only through the Vulkan backend on Linux.
172	if best_gpu and _is_vulkan_only_gpu(best_gpu):
173	warnings.append(
174	"Legacy Kepler GPU: no CUDA support in modern llama.cpp; "
175	"use the Vulkan backend (Linux) instead"

Callers 15

rank_modelsFunction · 0.90

test_full_gpu_fitFunction · 0.90

test_partial_offloadFunction · 0.90

test_usable_vram_budget_can_turn_full_gpu_into_partial_offloadFunction · 0.90

test_ram_budget_limits_partial_offload_poolFunction · 0.90

test_ram_budget_caps_shared_memory_gpu_fit_poolFunction · 0.90

test_shared_memory_manual_vram_override_caps_available_gpu_memoryFunction · 0.90

test_shared_memory_amd_apu_uses_system_memory_poolFunction · 0.90

test_windows_shared_memory_amd_apu_does_not_emit_rocm_warningFunction · 0.90

test_shared_memory_igpu_is_not_summed_with_dedicated_gpuFunction · 0.90

test_homogeneous_multi_gpu_uses_conservative_fit_budgetFunction · 0.90

test_heterogeneous_multi_gpu_warns_about_split_assumptionsFunction · 0.90

Calls 8

estimate_vramFunction · 0.90

effective_usable_ramFunction · 0.90

estimate_weight_bytesFunction · 0.90

CompatibilityResultClass · 0.90

_fit_candidate_gpusFunction · 0.85

_gpu_available_memoryFunction · 0.85

_multi_gpu_effective_vramFunction · 0.85

_is_vulkan_only_gpuFunction · 0.85

Tested by 15

test_full_gpu_fitFunction · 0.72

test_partial_offloadFunction · 0.72

test_usable_vram_budget_can_turn_full_gpu_into_partial_offloadFunction · 0.72

test_ram_budget_limits_partial_offload_poolFunction · 0.72

test_ram_budget_caps_shared_memory_gpu_fit_poolFunction · 0.72

test_shared_memory_manual_vram_override_caps_available_gpu_memoryFunction · 0.72

test_shared_memory_amd_apu_uses_system_memory_poolFunction · 0.72

test_windows_shared_memory_amd_apu_does_not_emit_rocm_warningFunction · 0.72

test_shared_memory_igpu_is_not_summed_with_dedicated_gpuFunction · 0.72

test_homogeneous_multi_gpu_uses_conservative_fit_budgetFunction · 0.72

test_heterogeneous_multi_gpu_warns_about_split_assumptionsFunction · 0.72

test_multiple_shared_memory_gpus_are_not_summedFunction · 0.72