process raw data to graph, labels and masks
(self)
| 136 | ) |
| 137 | |
| 138 | def process(self): |
| 139 | """process raw data to graph, labels and masks""" |
| 140 | self.labels = F.tensor( |
| 141 | np.load(os.path.join(self.raw_path, "graph_labels.npy")) |
| 142 | ) |
| 143 | num_graphs = self.labels.shape[0] |
| 144 | |
| 145 | node_graph_id = np.load( |
| 146 | os.path.join(self.raw_path, "node_graph_id.npy") |
| 147 | ) |
| 148 | edges = np.genfromtxt( |
| 149 | os.path.join(self.raw_path, "A.txt"), delimiter=",", dtype=int |
| 150 | ) |
| 151 | src = edges[:, 0] |
| 152 | dst = edges[:, 1] |
| 153 | g = graph((src, dst)) |
| 154 | |
| 155 | node_idx_list = [] |
| 156 | for idx in range(np.max(node_graph_id) + 1): |
| 157 | node_idx = np.where(node_graph_id == idx) |
| 158 | node_idx_list.append(node_idx[0]) |
| 159 | |
| 160 | self.graphs = [g.subgraph(node_idx) for node_idx in node_idx_list] |
| 161 | |
| 162 | train_idx = np.load(os.path.join(self.raw_path, "train_idx.npy")) |
| 163 | val_idx = np.load(os.path.join(self.raw_path, "val_idx.npy")) |
| 164 | test_idx = np.load(os.path.join(self.raw_path, "test_idx.npy")) |
| 165 | train_mask = np.zeros(num_graphs, dtype=np.bool_) |
| 166 | val_mask = np.zeros(num_graphs, dtype=np.bool_) |
| 167 | test_mask = np.zeros(num_graphs, dtype=np.bool_) |
| 168 | train_mask[train_idx] = True |
| 169 | val_mask[val_idx] = True |
| 170 | test_mask[test_idx] = True |
| 171 | self.train_mask = F.tensor(train_mask) |
| 172 | self.val_mask = F.tensor(val_mask) |
| 173 | self.test_mask = F.tensor(test_mask) |
| 174 | |
| 175 | feature_file = "new_" + self.feature_name + "_feature.npz" |
| 176 | self.feature = F.tensor( |
| 177 | sp.load_npz(os.path.join(self.raw_path, feature_file)).todense() |
| 178 | ) |
| 179 | |
| 180 | def save(self): |
| 181 | """save the graph list and the labels""" |