(comm_op, size, duration)
| 32 | # Helper function to calculate algbw and busbw. |
| 33 | # See https://gist.github.com/jeffra/b5e80466b4c86be00ea3b6f130fb7a36 and https://github.com/NVIDIA/nccl-tests/blob/master/doc/PERFORMANCE.md |
| 34 | def calc_bw_log(comm_op, size, duration): |
| 35 | import deepspeed.comm as dist |
| 36 | |
| 37 | n = dist.get_world_size() |
| 38 | tput = 0 |
| 39 | busbw = 0 |
| 40 | if comm_op == "all_to_all_single": |
| 41 | tput = (size / duration) |
| 42 | busbw = (size / duration) * ((n - 1) / n) |
| 43 | elif comm_op == "all_gather" or comm_op == "all_gather_into_tensor" or comm_op == "reduce_scatter" or comm_op == "reduce_scatter_tensor": |
| 44 | size *= n |
| 45 | tput = (size / duration) |
| 46 | busbw = (size / duration) * ((n - 1) / n) |
| 47 | elif comm_op == "all_reduce" or comm_op == "all_reduce_coalesced" or comm_op == "inference_all_reduce": |
| 48 | tput = (size * 2 / duration) |
| 49 | busbw = (size / duration) * (2 * (n - 1) / n) |
| 50 | elif comm_op == "send" or comm_op == "recv" or comm_op == "isend" or comm_op == "irecv" or comm_op == "broadcast" or comm_op == "reduce" or comm_op == "gather" or comm_op == "scatter" or comm_op == "barrier": |
| 51 | tput = (size / duration) |
| 52 | busbw = tput |
| 53 | else: |
| 54 | print_rank_0("wrong comm_op specified") # noqa: F821 |
| 55 | exit(0) |
| 56 | |
| 57 | # convert to Gbps |
| 58 | tput *= 8 |
| 59 | busbw *= 8 |
| 60 | |
| 61 | tput /= 1e6 |
| 62 | busbw /= 1e6 |
| 63 | |
| 64 | return tput, busbw |
| 65 | |
| 66 | |
| 67 | class CommsLogger: |
no test coverage detected
searching dependent graphs…