Check if a model+variant can run on the given hardware.
(
model: ModelInfo,
variant: GGUFVariant | None,
hardware: HardwareInfo,
context_length: int = 4096,
)
| 116 | |
| 117 | |
| 118 | def check_compatibility( |
| 119 | model: ModelInfo, |
| 120 | variant: GGUFVariant | None, |
| 121 | hardware: HardwareInfo, |
| 122 | context_length: int = 4096, |
| 123 | ) -> CompatibilityResult: |
| 124 | """Check if a model+variant can run on the given hardware.""" |
| 125 | warnings: list[str] = [] |
| 126 | |
| 127 | vram_required = estimate_vram(model, variant, context_length) |
| 128 | |
| 129 | usable_ram = effective_usable_ram(hardware.ram_bytes, hardware.ram_budget_bytes) |
| 130 | |
| 131 | # Determine best GPU |
| 132 | best_gpu: GPUInfo | None = None |
| 133 | best_gpu_available = 0 |
| 134 | gpu_available_values: list[int] = [] |
| 135 | candidate_gpus = _fit_candidate_gpus(hardware.gpus) |
| 136 | ram_budget_active = hardware.ram_budget_bytes is not None |
| 137 | for gpu in candidate_gpus: |
| 138 | gpu_available = _gpu_available_memory( |
| 139 | gpu, usable_ram, ram_budget_active=ram_budget_active |
| 140 | ) |
| 141 | gpu_available_values.append(gpu_available) |
| 142 | if best_gpu is None or gpu_available > best_gpu_available: |
| 143 | best_gpu = gpu |
| 144 | best_gpu_available = gpu_available |
| 145 | |
| 146 | vram_available = sum(gpu_available_values) if gpu_available_values else 0 |
| 147 | fit_vram_available, uses_multi_gpu, multi_gpu_effective_vram = ( |
| 148 | _multi_gpu_effective_vram(candidate_gpus, gpu_available_values, warnings) |
| 149 | ) |
| 150 | if ( |
| 151 | len(candidate_gpus) > 1 |
| 152 | and not uses_multi_gpu |
| 153 | and any(gpu.shared_memory or gpu.vendor == "apple" for gpu in candidate_gpus) |
| 154 | ): |
| 155 | vram_available = fit_vram_available |
| 156 | offload_ram_available = ( |
| 157 | 0 |
| 158 | if best_gpu and (best_gpu.shared_memory or best_gpu.vendor == "apple") |
| 159 | else usable_ram |
| 160 | ) |
| 161 | |
| 162 | # Check compute capability for NVIDIA |
| 163 | if best_gpu and best_gpu.vendor == "nvidia" and best_gpu.compute_capability: |
| 164 | if best_gpu.compute_capability < MIN_COMPUTE_CAPABILITY_OLLAMA: |
| 165 | warnings.append( |
| 166 | f"Compute capability {best_gpu.compute_capability} is below " |
| 167 | f"minimum {MIN_COMPUTE_CAPABILITY_OLLAMA} for Ollama" |
| 168 | ) |
| 169 | |
| 170 | # Flag legacy Kepler GPUs that have no CUDA support in modern llama.cpp. |
| 171 | # They can still run, but only through the Vulkan backend on Linux. |
| 172 | if best_gpu and _is_vulkan_only_gpu(best_gpu): |
| 173 | warnings.append( |
| 174 | "Legacy Kepler GPU: no CUDA support in modern llama.cpp; " |
| 175 | "use the Vulkan backend (Linux) instead" |