| 457 | raise ValueError("Unexpected MD5 hash of the split file") |
| 458 | |
| 459 | def process(self): |
| 460 | data_df = pd.read_csv(self.raw_data_path) |
| 461 | smiles_list = data_df["smiles"] |
| 462 | if self.verbose: |
| 463 | print("Converting SMILES strings into graphs...") |
| 464 | |
| 465 | for i in tqdm(range(len(smiles_list))): |
| 466 | smiles = smiles_list[i] |
| 467 | graph = self.smiles2graph(smiles) |
| 468 | |
| 469 | assert len(graph["edge_feat"]) == graph["edge_index"].shape[1] |
| 470 | assert len(graph["node_feat"]) == graph["num_nodes"] |
| 471 | DGLgraph = dgl_graph( |
| 472 | (graph["edge_index"][0], graph["edge_index"][1]), |
| 473 | num_nodes=graph["num_nodes"], |
| 474 | ) |
| 475 | DGLgraph.edata["feat"] = F.zerocopy_from_numpy( |
| 476 | graph["edge_feat"] |
| 477 | ).to(F.int64) |
| 478 | DGLgraph.ndata["feat"] = F.zerocopy_from_numpy( |
| 479 | graph["node_feat"] |
| 480 | ).to(F.int64) |
| 481 | self.graphs.append(DGLgraph) |
| 482 | self.labels.append(eval(data_df["labels"].iloc[i])) |
| 483 | self.labels = F.tensor(self.labels, dtype=F.float32) |
| 484 | |
| 485 | def load(self): |
| 486 | self.graphs, label_dict = load_graphs(self.graph_path) |