Perform a robust GPU cleanup sequence. This includes synchronizing, emptying caches, collecting IPC handles and running the Python garbage collector. Use this instead of a raw ``torch.cuda.empty_cache()`` where you need reliable freeing of GPU memory between model loads or in error
()
| 40 | |
| 41 | |
| 42 | def cleanup_cuda_memory() -> None: |
| 43 | """Perform a robust GPU cleanup sequence. |
| 44 | |
| 45 | This includes synchronizing, emptying caches, collecting IPC handles and |
| 46 | running the Python garbage collector. Use this instead of a raw |
| 47 | ``torch.cuda.empty_cache()`` where you need reliable freeing of GPU memory |
| 48 | between model loads or in error handling paths. |
| 49 | """ |
| 50 | try: |
| 51 | if torch.cuda.is_available(): |
| 52 | mem_before = get_gpu_memory_info() |
| 53 | |
| 54 | torch.cuda.synchronize() |
| 55 | torch.cuda.empty_cache() |
| 56 | # Collect cross-process cuda resources |
| 57 | try: |
| 58 | torch.cuda.ipc_collect() |
| 59 | except Exception: |
| 60 | # Older PyTorch versions or non-cuda devices may not support |
| 61 | # ipc_collect (no-op if not available) |
| 62 | pass |
| 63 | gc.collect() |
| 64 | |
| 65 | mem_after = get_gpu_memory_info() |
| 66 | if mem_before and mem_after: |
| 67 | freed = mem_before["reserved_gb"] - mem_after["reserved_gb"] |
| 68 | print( |
| 69 | f"CUDA cleanup: freed {freed:.2f}GB, " |
| 70 | f"available: {mem_after['free_gb']:.2f}GB/{mem_after['total_gb']:.2f}GB" |
| 71 | ) |
| 72 | else: |
| 73 | print("CUDA memory cleanup completed") |
| 74 | except Exception as e: |
| 75 | print(f"Warning: CUDA cleanup failed: {e}") |
| 76 | |
| 77 | |
| 78 | def check_memory_availability(required_gb: float = 2.0) -> tuple[bool, str]: |
no test coverage detected