Profile the bidirectional communication bandwidth between CPU and GPU using data volumes ranging from 1KB to 1GB.
(self)
| 162 | raise TypeError(f"Unknown NVIDIA GPU device name {device_name}") |
| 163 | |
| 164 | def _profile_bandwidth(self): |
| 165 | """ |
| 166 | Profile the bidirectional communication bandwidth between CPU and GPU |
| 167 | using data volumes ranging from 1KB to 1GB. |
| 168 | """ |
| 169 | |
| 170 | print("profiling bandwidth ......") |
| 171 | link_to_bandwidth = {} |
| 172 | links = ["h2d", "d2h"] |
| 173 | |
| 174 | for link in links: |
| 175 | t_size = 1024 |
| 176 | size_to_bandwidth = {} |
| 177 | |
| 178 | # from 1KB to 1GB |
| 179 | for i in range(21): |
| 180 | if link == "h2d": |
| 181 | src_tensor = torch.ones(int(t_size), dtype=torch.int8, pin_memory=True) |
| 182 | dst_tensor = torch.ones((int(t_size)), dtype=torch.int8, device="cuda") |
| 183 | elif link == "d2h": |
| 184 | src_tensor = torch.ones(int(t_size), dtype=torch.int8, device="cuda") |
| 185 | dst_tensor = torch.ones((int(t_size)), dtype=torch.int8, pin_memory=True) |
| 186 | |
| 187 | def func(): |
| 188 | dst_tensor.copy_(src_tensor) |
| 189 | |
| 190 | size_to_bandwidth[t_size] = t_size / benchmark_func(func, number=5, repeat=3) |
| 191 | print( |
| 192 | f"size: {t_size / 1024 ** 2:.3f} MB, " |
| 193 | f"{src_tensor.device.type}-to-{dst_tensor.device.type} " |
| 194 | f"bandwidth: {size_to_bandwidth[t_size] / 1024 ** 3:.3f} GB/s" |
| 195 | ) |
| 196 | |
| 197 | t_size *= 2 |
| 198 | |
| 199 | link_to_bandwidth[link] = size_to_bandwidth |
| 200 | return link_to_bandwidth |
| 201 | |
| 202 | |
| 203 | class SynGreedySolver(Solver): |
no test coverage detected