Return a reusable buffer view for the requested shape/dtype. The returned tensor is backed by an underlying `torch.uint8` buffer. When no suitable buffer exists in the pool, a new tensor is created via `torch.empty`, so its contents are uninitialized. Overwrite the data befor
(self, tensor_shape: list[int], dtype: torch.dtype,
buffer_name: str, reserve_buffer: bool)
| 50 | target_shape) |
| 51 | |
| 52 | def get_buffer(self, tensor_shape: list[int], dtype: torch.dtype, |
| 53 | buffer_name: str, reserve_buffer: bool): |
| 54 | """Return a reusable buffer view for the requested shape/dtype. |
| 55 | The returned tensor is backed by an underlying `torch.uint8` buffer. When |
| 56 | no suitable buffer exists in the pool, a new tensor is created via |
| 57 | `torch.empty`, so its contents are uninitialized. Overwrite the data before use if needed. |
| 58 | """ |
| 59 | |
| 60 | # all buffers are allocated with 1 byte element size |
| 61 | required_memory_size = math.prod(tensor_shape) * dtype.itemsize |
| 62 | |
| 63 | candidate_blocks = self.buffers.get(buffer_name, []) |
| 64 | |
| 65 | # Find the best-fit available buffer. |
| 66 | best_fit_block: Optional[BufferBlock] = None |
| 67 | smallest_sufficient_size = float('inf') |
| 68 | for block in candidate_blocks: |
| 69 | # Skip buffers that are too small. |
| 70 | if block.buffer.numel() < required_memory_size: |
| 71 | continue |
| 72 | |
| 73 | # Find the smallest buffer that is still large enough (best-fit). |
| 74 | if block.buffer.numel() < smallest_sufficient_size: |
| 75 | # Use reserved block if find one. |
| 76 | if best_fit_block is not None and best_fit_block.is_reserved and not block.is_reserved: |
| 77 | continue |
| 78 | |
| 79 | best_fit_block = block |
| 80 | smallest_sufficient_size = block.buffer.numel() |
| 81 | |
| 82 | if best_fit_block is not None: |
| 83 | if reserve_buffer: |
| 84 | best_fit_block.is_reserved = True |
| 85 | # A suitable buffer was found, so reuse it. |
| 86 | return self._view_as(best_fit_block.buffer, tensor_shape, dtype) |
| 87 | |
| 88 | for block in list(candidate_blocks): |
| 89 | if not block.is_reserved: |
| 90 | # Need to call del BufferBlock.buffer, otherwise memory isn't |
| 91 | # released and OOM may happen. |
| 92 | buffer_size = block.buffer.numel() |
| 93 | del block.buffer |
| 94 | if buffer_size >= 1024 * 1024 * 1024: |
| 95 | torch.cuda.empty_cache() |
| 96 | candidate_blocks.remove(block) |
| 97 | |
| 98 | # No suitable buffer was found, so allocate a new one. |
| 99 | # The new buffer is created with uint8 to represent raw bytes. |
| 100 | new_buffer_tensor = None |
| 101 | try: |
| 102 | with torch.cuda.memory.use_mem_pool(get_shared_pool()): |
| 103 | new_buffer_tensor = torch.empty((required_memory_size, ), |
| 104 | device='cuda', |
| 105 | dtype=torch.uint8) |
| 106 | except Exception as ex: |
| 107 | # Need to check if this is an OOM exception |
| 108 | logger.debug( |
| 109 | f"Exception happened to create tensor from given memory pool: {str(ex)}" |
nothing calls this directly
no test coverage detected