MCPcopy
hub / github.com/dmlc/dgl / _chunk_graph

Function _chunk_graph

tools/chunk_graph.py:30–157  ·  view source on GitHub ↗
(
    g, name, ndata_paths, edata_paths, num_chunks, output_path, data_fmt
)

Source from the content-addressed store, hash-verified

28
29
30def _chunk_graph(
31 g, name, ndata_paths, edata_paths, num_chunks, output_path, data_fmt
32):
33 # First deal with ndata and edata that are homogeneous (i.e. not a dict-of-dict)
34 if len(g.ntypes) == 1 and not isinstance(
35 next(iter(ndata_paths.values())), dict
36 ):
37 ndata_paths = {g.ntypes[0]: ndata_paths}
38 if len(g.etypes) == 1 and not isinstance(
39 next(iter(edata_paths.values())), dict
40 ):
41 edata_paths = {g.etypes[0]: ndata_paths}
42 # Then convert all edge types to canonical edge types
43 etypestrs = {etype: ":".join(etype) for etype in g.canonical_etypes}
44 edata_paths = {
45 ":".join(g.to_canonical_etype(k)): v for k, v in edata_paths.items()
46 }
47
48 metadata = {}
49
50 metadata["graph_name"] = name
51 metadata["node_type"] = g.ntypes
52
53 # Compute the number of nodes per chunk per node type
54 metadata["num_nodes_per_chunk"] = num_nodes_per_chunk = []
55 for ntype in g.ntypes:
56 num_nodes = g.num_nodes(ntype)
57 num_nodes_list = []
58 for i in range(num_chunks):
59 n = num_nodes // num_chunks + (i < num_nodes % num_chunks)
60 num_nodes_list.append(n)
61 num_nodes_per_chunk.append(num_nodes_list)
62 num_nodes_per_chunk_dict = {
63 k: v for k, v in zip(g.ntypes, num_nodes_per_chunk)
64 }
65
66 metadata["edge_type"] = [etypestrs[etype] for etype in g.canonical_etypes]
67
68 # Compute the number of edges per chunk per edge type
69 metadata["num_edges_per_chunk"] = num_edges_per_chunk = []
70 for etype in g.canonical_etypes:
71 num_edges = g.num_edges(etype)
72 num_edges_list = []
73 for i in range(num_chunks):
74 n = num_edges // num_chunks + (i < num_edges % num_chunks)
75 num_edges_list.append(n)
76 num_edges_per_chunk.append(num_edges_list)
77 num_edges_per_chunk_dict = {
78 k: v for k, v in zip(g.canonical_etypes, num_edges_per_chunk)
79 }
80
81 # Split edge index
82 metadata["edges"] = {}
83 with setdir("edge_index"):
84 for etype in g.canonical_etypes:
85 etypestr = etypestrs[etype]
86 logging.info("Chunking edge index for %s" % etypestr)
87 edges_meta = {}

Callers 1

chunk_graphFunction · 0.70

Calls 13

setdirFunction · 0.90
chunk_numpy_arrayFunction · 0.85
appendMethod · 0.80
infoMethod · 0.80
dumpMethod · 0.80
valuesMethod · 0.45
joinMethod · 0.45
to_canonical_etypeMethod · 0.45
itemsMethod · 0.45
num_nodesMethod · 0.45
num_edgesMethod · 0.45
edgesMethod · 0.45

Tested by

no test coverage detected