Example - E + M + D parallel world_size = 16 model_degree = 2 expert_degree = 4 # number of experts in same group mp_group = [0, 1], [2,3], [4,5] ... data_parallel_group =[0,2,4,6,8,10, 12,14], [1,3,5,7,9,11,13,15] expert_parallel_group = [0,2,4,6], [8,10
()
| 7 | |
| 8 | |
| 9 | def test_get_expert_parallel_ranks(): |
| 10 | """ |
| 11 | Example - E + M + D parallel |
| 12 | world_size = 16 |
| 13 | model_degree = 2 |
| 14 | expert_degree = 4 # number of experts in same group |
| 15 | mp_group = [0, 1], [2,3], [4,5] ... |
| 16 | data_parallel_group =[0,2,4,6,8,10, 12,14], [1,3,5,7,9,11,13,15] |
| 17 | expert_parallel_group = [0,2,4,6], [8,10,12,14] [1,3,5,7], [9,11,13,15] |
| 18 | expert_data_parallel_group = [0,8],[2,10],[4,12],[6,14], [1,9],[3,11],[5,13],[7,15] |
| 19 | """ |
| 20 | expert_parallel_groups, expert_data_parallel_groups = _get_expert_parallel_ranks(world_size=16, |
| 21 | tensor_parallel_size_=2, |
| 22 | expert_parallel_size_=4) |
| 23 | assert expert_parallel_groups == [ |
| 24 | [0, 2, 4, 6], |
| 25 | [8, 10, 12, 14], |
| 26 | [1, 3, 5, 7], |
| 27 | [9, 11, 13, 15], |
| 28 | ] |
| 29 | assert expert_data_parallel_groups == [ |
| 30 | [0, 8], |
| 31 | [2, 10], |
| 32 | [4, 12], |
| 33 | [6, 14], |
| 34 | [1, 9], |
| 35 | [3, 11], |
| 36 | [5, 13], |
| 37 | [7, 15], |
| 38 | ] |
nothing calls this directly
no test coverage detected
searching dependent graphs…