MCPcopy
hub / github.com/Andyyyy64/whichllm / check_compatibility

Function check_compatibility

src/whichllm/engine/compatibility.py:118–261  ·  view source on GitHub ↗

Check if a model+variant can run on the given hardware.

(
    model: ModelInfo,
    variant: GGUFVariant | None,
    hardware: HardwareInfo,
    context_length: int = 4096,
)

Source from the content-addressed store, hash-verified

116
117
118def check_compatibility(
119 model: ModelInfo,
120 variant: GGUFVariant | None,
121 hardware: HardwareInfo,
122 context_length: int = 4096,
123) -> CompatibilityResult:
124 """Check if a model+variant can run on the given hardware."""
125 warnings: list[str] = []
126
127 vram_required = estimate_vram(model, variant, context_length)
128
129 usable_ram = effective_usable_ram(hardware.ram_bytes, hardware.ram_budget_bytes)
130
131 # Determine best GPU
132 best_gpu: GPUInfo | None = None
133 best_gpu_available = 0
134 gpu_available_values: list[int] = []
135 candidate_gpus = _fit_candidate_gpus(hardware.gpus)
136 ram_budget_active = hardware.ram_budget_bytes is not None
137 for gpu in candidate_gpus:
138 gpu_available = _gpu_available_memory(
139 gpu, usable_ram, ram_budget_active=ram_budget_active
140 )
141 gpu_available_values.append(gpu_available)
142 if best_gpu is None or gpu_available > best_gpu_available:
143 best_gpu = gpu
144 best_gpu_available = gpu_available
145
146 vram_available = sum(gpu_available_values) if gpu_available_values else 0
147 fit_vram_available, uses_multi_gpu, multi_gpu_effective_vram = (
148 _multi_gpu_effective_vram(candidate_gpus, gpu_available_values, warnings)
149 )
150 if (
151 len(candidate_gpus) > 1
152 and not uses_multi_gpu
153 and any(gpu.shared_memory or gpu.vendor == "apple" for gpu in candidate_gpus)
154 ):
155 vram_available = fit_vram_available
156 offload_ram_available = (
157 0
158 if best_gpu and (best_gpu.shared_memory or best_gpu.vendor == "apple")
159 else usable_ram
160 )
161
162 # Check compute capability for NVIDIA
163 if best_gpu and best_gpu.vendor == "nvidia" and best_gpu.compute_capability:
164 if best_gpu.compute_capability < MIN_COMPUTE_CAPABILITY_OLLAMA:
165 warnings.append(
166 f"Compute capability {best_gpu.compute_capability} is below "
167 f"minimum {MIN_COMPUTE_CAPABILITY_OLLAMA} for Ollama"
168 )
169
170 # Flag legacy Kepler GPUs that have no CUDA support in modern llama.cpp.
171 # They can still run, but only through the Vulkan backend on Linux.
172 if best_gpu and _is_vulkan_only_gpu(best_gpu):
173 warnings.append(
174 "Legacy Kepler GPU: no CUDA support in modern llama.cpp; "
175 "use the Vulkan backend (Linux) instead"

Calls 8

estimate_vramFunction · 0.90
effective_usable_ramFunction · 0.90
estimate_weight_bytesFunction · 0.90
CompatibilityResultClass · 0.90
_fit_candidate_gpusFunction · 0.85
_gpu_available_memoryFunction · 0.85
_is_vulkan_only_gpuFunction · 0.85