(partition_book, rank, part_eles)
| 1777 | |
| 1778 | |
| 1779 | def _split_random_within_part(partition_book, rank, part_eles): |
| 1780 | # If there are more than one client in a partition, we need to randomly select a subset of |
| 1781 | # elements in the partition for a client. We have to make sure that the set of elements |
| 1782 | # for different clients are disjoint. |
| 1783 | |
| 1784 | num_clients = role.get_num_trainers() |
| 1785 | num_client_per_part = num_clients // partition_book.num_partitions() |
| 1786 | if num_client_per_part == 1: |
| 1787 | return part_eles |
| 1788 | if rank is None: |
| 1789 | rank = role.get_trainer_rank() |
| 1790 | assert ( |
| 1791 | rank < num_clients |
| 1792 | ), "The input rank ({}) is incorrect. #Trainers: {}".format( |
| 1793 | rank, num_clients |
| 1794 | ) |
| 1795 | client_id_in_part = rank % num_client_per_part |
| 1796 | offset = _even_offset(len(part_eles), num_client_per_part) |
| 1797 | |
| 1798 | # We set the random seed for each partition, so that each process (client) in a partition |
| 1799 | # permute the elements in a partition in the same way, so each process gets a disjoint subset |
| 1800 | # of elements. |
| 1801 | np.random.seed(partition_book.partid) |
| 1802 | rand_idx = np.random.permutation(len(part_eles)) |
| 1803 | rand_idx = rand_idx[ |
| 1804 | offset[client_id_in_part] : offset[client_id_in_part + 1] |
| 1805 | ] |
| 1806 | idx, _ = F.sort_1d(F.tensor(rand_idx)) |
| 1807 | return F.gather_row(part_eles, idx) |
| 1808 | |
| 1809 | |
| 1810 | def _split_by_trainer_id( |
no test coverage detected