MCPcopy
hub / github.com/Andyyyy64/whichllm / _multi_gpu_effective_vram

Function _multi_gpu_effective_vram

src/whichllm/engine/compatibility.py:80–115  ·  view source on GitHub ↗
(
    gpus: list[GPUInfo],
    available: list[int],
    warnings: list[str],
)

Source from the content-addressed store, hash-verified

78
79
80def _multi_gpu_effective_vram(
81 gpus: list[GPUInfo],
82 available: list[int],
83 warnings: list[str],
84) -> tuple[int, bool, int | None]:
85 raw_total = sum(available)
86 if len(gpus) <= 1:
87 return raw_total, False, None
88
89 if any(gpu.shared_memory or gpu.vendor == "apple" for gpu in gpus):
90 effective = max(available)
91 warnings.append(
92 "Multiple shared-memory GPUs are not pooled; using the largest "
93 "reported memory pool for fit checks"
94 )
95 return effective, False, None
96
97 homogeneous = _is_homogeneous_gpu_set(gpus, available)
98 utilization = (
99 _MULTI_GPU_HOMOGENEOUS_UTILIZATION
100 if homogeneous
101 else _MULTI_GPU_HETEROGENEOUS_UTILIZATION
102 )
103 overhead = min(raw_total, len(gpus) * _MULTI_GPU_FRAMEWORK_OVERHEAD_BYTES)
104 effective = int((raw_total - overhead) * utilization)
105
106 warnings.append(
107 "Multi-GPU fit uses a conservative layer-split budget: "
108 f"{effective / _GiB:.1f} GB effective from {raw_total / _GiB:.1f} GB raw VRAM"
109 )
110 if not homogeneous:
111 warnings.append(
112 "Heterogeneous multi-GPU setup: fit assumes uneven layer placement; "
113 "speed depends on backend split mode and interconnect"
114 )
115 return effective, True, effective
116
117
118def check_compatibility(

Callers 1

check_compatibilityFunction · 0.85

Calls 1

_is_homogeneous_gpu_setFunction · 0.85

Tested by

no test coverage detected