MCPcopy
hub / github.com/hpcaitech/ColossalAI / _profile_bandwidth

Method _profile_bandwidth

colossalai/auto_parallel/offload/solver.py:164–200  ·  view source on GitHub ↗

Profile the bidirectional communication bandwidth between CPU and GPU using data volumes ranging from 1KB to 1GB.

(self)

Source from the content-addressed store, hash-verified

162 raise TypeError(f"Unknown NVIDIA GPU device name {device_name}")
163
164 def _profile_bandwidth(self):
165 """
166 Profile the bidirectional communication bandwidth between CPU and GPU
167 using data volumes ranging from 1KB to 1GB.
168 """
169
170 print("profiling bandwidth ......")
171 link_to_bandwidth = {}
172 links = ["h2d", "d2h"]
173
174 for link in links:
175 t_size = 1024
176 size_to_bandwidth = {}
177
178 # from 1KB to 1GB
179 for i in range(21):
180 if link == "h2d":
181 src_tensor = torch.ones(int(t_size), dtype=torch.int8, pin_memory=True)
182 dst_tensor = torch.ones((int(t_size)), dtype=torch.int8, device="cuda")
183 elif link == "d2h":
184 src_tensor = torch.ones(int(t_size), dtype=torch.int8, device="cuda")
185 dst_tensor = torch.ones((int(t_size)), dtype=torch.int8, pin_memory=True)
186
187 def func():
188 dst_tensor.copy_(src_tensor)
189
190 size_to_bandwidth[t_size] = t_size / benchmark_func(func, number=5, repeat=3)
191 print(
192 f"size: {t_size / 1024 ** 2:.3f} MB, "
193 f"{src_tensor.device.type}-to-{dst_tensor.device.type} "
194 f"bandwidth: {size_to_bandwidth[t_size] / 1024 ** 3:.3f} GB/s"
195 )
196
197 t_size *= 2
198
199 link_to_bandwidth[link] = size_to_bandwidth
200 return link_to_bandwidth
201
202
203class SynGreedySolver(Solver):

Callers 1

__init__Method · 0.95

Calls 1

benchmark_funcFunction · 0.85

Tested by

no test coverage detected