MCPcopy
hub / github.com/dmlc/dgl / process

Method process

python/dgl/data/lrgb.py:191–235  ·  view source on GitHub ↗
(self)

Source from the content-addressed store, hash-verified

189 raise ValueError("Unexpected MD5 hash of the split file")
190
191 def process(self):
192 data_df = pd.read_csv(self.raw_data_path)
193 smiles_list = data_df["smiles"]
194 target_names = [
195 "Inertia_mass_a",
196 "Inertia_mass_b",
197 "Inertia_mass_c",
198 "Inertia_valence_a",
199 "Inertia_valence_b",
200 "Inertia_valence_c",
201 "length_a",
202 "length_b",
203 "length_c",
204 "Spherocity",
205 "Plane_best_fit",
206 ]
207 # Normalize to zero mean and unit standard deviation.
208 data_df.loc[:, target_names] = data_df.loc[:, target_names].apply(
209 lambda x: (x - x.mean()) / x.std(), axis=0
210 )
211 if self.verbose:
212 print("Converting SMILES strings into graphs...")
213
214 for i in tqdm(range(len(smiles_list))):
215 smiles = smiles_list[i]
216 y = data_df.iloc[i][target_names]
217 graph = self.smiles2graph(smiles)
218
219 assert len(graph["edge_feat"]) == graph["edge_index"].shape[1]
220 assert len(graph["node_feat"]) == graph["num_nodes"]
221 DGLgraph = dgl_graph(
222 (graph["edge_index"][0], graph["edge_index"][1]),
223 num_nodes=graph["num_nodes"],
224 )
225 DGLgraph.edata["feat"] = F.zerocopy_from_numpy(
226 graph["edge_feat"]
227 ).to(F.int64)
228 DGLgraph.ndata["feat"] = F.zerocopy_from_numpy(
229 graph["node_feat"]
230 ).to(F.int64)
231
232 self.graphs.append(DGLgraph)
233 self.labels.append(y)
234
235 self.labels = F.tensor(self.labels, dtype=F.float32)
236
237 def load(self):
238 self.graphs, label_dict = load_graphs(self.graph_path)

Callers

nothing calls this directly

Calls 4

read_csvMethod · 0.80
stdMethod · 0.80
appendMethod · 0.80
toMethod · 0.45

Tested by

no test coverage detected