Loads input data from data directory and reorder graph for better locality ind.name.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object; ind.name.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object; ind.n
(self)
| 107 | ) |
| 108 | |
| 109 | def process(self): |
| 110 | """Loads input data from data directory and reorder graph for better locality |
| 111 | |
| 112 | ind.name.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object; |
| 113 | ind.name.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object; |
| 114 | ind.name.allx => the feature vectors of both labeled and unlabeled training instances |
| 115 | (a superset of ind.name.x) as scipy.sparse.csr.csr_matrix object; |
| 116 | ind.name.y => the one-hot labels of the labeled training instances as numpy.ndarray object; |
| 117 | ind.name.ty => the one-hot labels of the test instances as numpy.ndarray object; |
| 118 | ind.name.ally => the labels for instances in ind.name.allx as numpy.ndarray object; |
| 119 | ind.name.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict |
| 120 | object; |
| 121 | ind.name.test.index => the indices of test instances in graph, for the inductive setting as list object. |
| 122 | """ |
| 123 | root = self.raw_path |
| 124 | objnames = ["x", "y", "tx", "ty", "allx", "ally", "graph"] |
| 125 | objects = [] |
| 126 | for i in range(len(objnames)): |
| 127 | with open( |
| 128 | "{}/ind.{}.{}".format(root, self.name, objnames[i]), "rb" |
| 129 | ) as f: |
| 130 | objects.append(_pickle_load(f)) |
| 131 | |
| 132 | x, y, tx, ty, allx, ally, graph = tuple(objects) |
| 133 | test_idx_reorder = _parse_index_file( |
| 134 | "{}/ind.{}.test.index".format(root, self.name) |
| 135 | ) |
| 136 | test_idx_range = np.sort(test_idx_reorder) |
| 137 | |
| 138 | if self.name == "citeseer": |
| 139 | # Fix citeseer dataset (there are some isolated nodes in the graph) |
| 140 | # Find isolated nodes, add them as zero-vecs into the right position |
| 141 | test_idx_range_full = range( |
| 142 | min(test_idx_reorder), max(test_idx_reorder) + 1 |
| 143 | ) |
| 144 | tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1])) |
| 145 | tx_extended[test_idx_range - min(test_idx_range), :] = tx |
| 146 | tx = tx_extended |
| 147 | ty_extended = np.zeros((len(test_idx_range_full), y.shape[1])) |
| 148 | ty_extended[test_idx_range - min(test_idx_range), :] = ty |
| 149 | ty = ty_extended |
| 150 | |
| 151 | features = sp.vstack((allx, tx)).tolil() |
| 152 | features[test_idx_reorder, :] = features[test_idx_range, :] |
| 153 | |
| 154 | if self.reverse_edge: |
| 155 | graph = nx.DiGraph(nx.from_dict_of_lists(graph)) |
| 156 | g = from_networkx(graph) |
| 157 | else: |
| 158 | graph = nx.Graph(nx.from_dict_of_lists(graph)) |
| 159 | edges = list(graph.edges()) |
| 160 | u, v = map(list, zip(*edges)) |
| 161 | g = dgl_graph((u, v)) |
| 162 | |
| 163 | onehot_labels = np.vstack((ally, ty)) |
| 164 | onehot_labels[test_idx_reorder, :] = onehot_labels[test_idx_range, :] |
| 165 | labels = np.argmax(onehot_labels, 1) |
| 166 |
nothing calls this directly
no test coverage detected