hub / github.com/dmlc/dgl / _split_random_within_part

Function _split_random_within_part

python/dgl/distributed/dist_graph.py:1779–1807 · view source on GitHub ↗

(partition_book, rank, part_eles)

Source from the content-addressed store, hash-verified

1777
1778
1779	def _split_random_within_part(partition_book, rank, part_eles):
1780	# If there are more than one client in a partition, we need to randomly select a subset of
1781	# elements in the partition for a client. We have to make sure that the set of elements
1782	# for different clients are disjoint.
1783
1784	num_clients = role.get_num_trainers()
1785	num_client_per_part = num_clients // partition_book.num_partitions()
1786	if num_client_per_part == 1:
1787	return part_eles
1788	if rank is None:
1789	rank = role.get_trainer_rank()
1790	assert (
1791	rank < num_clients
1792	), "The input rank ({}) is incorrect. #Trainers: {}".format(
1793	rank, num_clients
1794	)
1795	client_id_in_part = rank % num_client_per_part
1796	offset = _even_offset(len(part_eles), num_client_per_part)
1797
1798	# We set the random seed for each partition, so that each process (client) in a partition
1799	# permute the elements in a partition in the same way, so each process gets a disjoint subset
1800	# of elements.
1801	np.random.seed(partition_book.partid)
1802	rand_idx = np.random.permutation(len(part_eles))
1803	rand_idx = rand_idx[
1804	offset[client_id_in_part] : offset[client_id_in_part + 1]
1805	]
1806	idx, _ = F.sort_1d(F.tensor(rand_idx))
1807	return F.gather_row(part_eles, idx)
1808
1809
1810	def _split_by_trainer_id(

node_splitFunction · 0.85

edge_splitFunction · 0.85

_even_offsetFunction · 0.85

formatMethod · 0.80

num_partitionsMethod · 0.45

no test coverage detected