Load citation network dataset (cora only for now)
(path="../data/cora/", dataset="cora")
| 13 | |
| 14 | |
| 15 | def load_data(path="../data/cora/", dataset="cora"): |
| 16 | """Load citation network dataset (cora only for now)""" |
| 17 | print('Loading {} dataset...'.format(dataset)) |
| 18 | |
| 19 | idx_features_labels = np.genfromtxt("{}{}.content".format(path, dataset), |
| 20 | dtype=np.dtype(str)) |
| 21 | features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32) |
| 22 | labels = encode_onehot(idx_features_labels[:, -1]) |
| 23 | |
| 24 | # build graph |
| 25 | idx = np.array(idx_features_labels[:, 0], dtype=np.int32) |
| 26 | idx_map = {j: i for i, j in enumerate(idx)} |
| 27 | edges_unordered = np.genfromtxt("{}{}.cites".format(path, dataset), |
| 28 | dtype=np.int32) |
| 29 | edges = np.array(list(map(idx_map.get, edges_unordered.flatten())), |
| 30 | dtype=np.int32).reshape(edges_unordered.shape) |
| 31 | adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), |
| 32 | shape=(labels.shape[0], labels.shape[0]), |
| 33 | dtype=np.float32) |
| 34 | |
| 35 | # build symmetric adjacency matrix |
| 36 | adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj) |
| 37 | |
| 38 | features = normalize(features) |
| 39 | adj = normalize(adj + sp.eye(adj.shape[0])) |
| 40 | |
| 41 | idx_train = range(140) |
| 42 | idx_val = range(200, 500) |
| 43 | idx_test = range(500, 1500) |
| 44 | |
| 45 | features = torch.FloatTensor(np.array(features.todense())) |
| 46 | labels = torch.LongTensor(np.where(labels)[1]) |
| 47 | adj = sparse_mx_to_torch_sparse_tensor(adj) |
| 48 | |
| 49 | idx_train = torch.LongTensor(idx_train) |
| 50 | idx_val = torch.LongTensor(idx_val) |
| 51 | idx_test = torch.LongTensor(idx_test) |
| 52 | |
| 53 | return adj, features, labels, idx_train, idx_val, idx_test |
| 54 | |
| 55 | |
| 56 | def normalize(mx): |
no test coverage detected