Function calc_bw_log

deepspeed/utils/comms_logging.py:34–64 · view source on GitHub ↗

(comm_op, size, duration)

Source from the content-addressed store, hash-verified

32	# Helper function to calculate algbw and busbw.
33	# See https://gist.github.com/jeffra/b5e80466b4c86be00ea3b6f130fb7a36 and https://github.com/NVIDIA/nccl-tests/blob/master/doc/PERFORMANCE.md
34	def calc_bw_log(comm_op, size, duration):
35	import deepspeed.comm as dist
36
37	n = dist.get_world_size()
38	tput = 0
39	busbw = 0
40	if comm_op == "all_to_all_single":
41	tput = (size / duration)
42	busbw = (size / duration) * ((n - 1) / n)
43	elif comm_op == "all_gather" or comm_op == "all_gather_into_tensor" or comm_op == "reduce_scatter" or comm_op == "reduce_scatter_tensor":
44	size *= n
45	tput = (size / duration)
46	busbw = (size / duration) * ((n - 1) / n)
47	elif comm_op == "all_reduce" or comm_op == "all_reduce_coalesced" or comm_op == "inference_all_reduce":
48	tput = (size * 2 / duration)
49	busbw = (size / duration) * (2 * (n - 1) / n)
50	elif comm_op == "send" or comm_op == "recv" or comm_op == "isend" or comm_op == "irecv" or comm_op == "broadcast" or comm_op == "reduce" or comm_op == "gather" or comm_op == "scatter" or comm_op == "barrier":
51	tput = (size / duration)
52	busbw = tput
53	else:
54	print_rank_0("wrong comm_op specified") # noqa: F821
55	exit(0)
56
57	# convert to Gbps
58	tput *= 8
59	busbw *= 8
60
61	tput /= 1e6
62	busbw /= 1e6
63
64	return tput, busbw
65
66
67	class CommsLogger:

appendMethod · 0.85

get_world_sizeMethod · 0.80

print_rank_0Function · 0.70

no test coverage detected

searching dependent graphs…