Function _multi_gpu_effective_vram

src/whichllm/engine/compatibility.py:80–115 · view source on GitHub ↗

(
    gpus: list[GPUInfo],
    available: list[int],
    warnings: list[str],
)

Source from the content-addressed store, hash-verified

78
79
80	def _multi_gpu_effective_vram(
81	gpus: list[GPUInfo],
82	available: list[int],
83	warnings: list[str],
84	) -> tuple[int, bool, int \| None]:
85	raw_total = sum(available)
86	if len(gpus) <= 1:
87	return raw_total, False, None
88
89	if any(gpu.shared_memory or gpu.vendor == "apple" for gpu in gpus):
90	effective = max(available)
91	warnings.append(
92	"Multiple shared-memory GPUs are not pooled; using the largest "
93	"reported memory pool for fit checks"
94	)
95	return effective, False, None
96
97	homogeneous = _is_homogeneous_gpu_set(gpus, available)
98	utilization = (
99	_MULTI_GPU_HOMOGENEOUS_UTILIZATION
100	if homogeneous
101	else _MULTI_GPU_HETEROGENEOUS_UTILIZATION
102	)
103	overhead = min(raw_total, len(gpus) * _MULTI_GPU_FRAMEWORK_OVERHEAD_BYTES)
104	effective = int((raw_total - overhead) * utilization)
105
106	warnings.append(
107	"Multi-GPU fit uses a conservative layer-split budget: "
108	f"{effective / _GiB:.1f} GB effective from {raw_total / _GiB:.1f} GB raw VRAM"
109	)
110	if not homogeneous:
111	warnings.append(
112	"Heterogeneous multi-GPU setup: fit assumes uneven layer placement; "
113	"speed depends on backend split mode and interconnect"
114	)
115	return effective, True, effective
116
117
118	def check_compatibility(

check_compatibilityFunction · 0.85

_is_homogeneous_gpu_setFunction · 0.85

no test coverage detected