(
gpus: list[GPUInfo],
available: list[int],
warnings: list[str],
)
| 78 | |
| 79 | |
| 80 | def _multi_gpu_effective_vram( |
| 81 | gpus: list[GPUInfo], |
| 82 | available: list[int], |
| 83 | warnings: list[str], |
| 84 | ) -> tuple[int, bool, int | None]: |
| 85 | raw_total = sum(available) |
| 86 | if len(gpus) <= 1: |
| 87 | return raw_total, False, None |
| 88 | |
| 89 | if any(gpu.shared_memory or gpu.vendor == "apple" for gpu in gpus): |
| 90 | effective = max(available) |
| 91 | warnings.append( |
| 92 | "Multiple shared-memory GPUs are not pooled; using the largest " |
| 93 | "reported memory pool for fit checks" |
| 94 | ) |
| 95 | return effective, False, None |
| 96 | |
| 97 | homogeneous = _is_homogeneous_gpu_set(gpus, available) |
| 98 | utilization = ( |
| 99 | _MULTI_GPU_HOMOGENEOUS_UTILIZATION |
| 100 | if homogeneous |
| 101 | else _MULTI_GPU_HETEROGENEOUS_UTILIZATION |
| 102 | ) |
| 103 | overhead = min(raw_total, len(gpus) * _MULTI_GPU_FRAMEWORK_OVERHEAD_BYTES) |
| 104 | effective = int((raw_total - overhead) * utilization) |
| 105 | |
| 106 | warnings.append( |
| 107 | "Multi-GPU fit uses a conservative layer-split budget: " |
| 108 | f"{effective / _GiB:.1f} GB effective from {raw_total / _GiB:.1f} GB raw VRAM" |
| 109 | ) |
| 110 | if not homogeneous: |
| 111 | warnings.append( |
| 112 | "Heterogeneous multi-GPU setup: fit assumes uneven layer placement; " |
| 113 | "speed depends on backend split mode and interconnect" |
| 114 | ) |
| 115 | return effective, True, effective |
| 116 | |
| 117 | |
| 118 | def check_compatibility( |
no test coverage detected