| 35 | |
| 36 | |
| 37 | class Struc2Vec(): |
| 38 | def __init__(self, graph, walk_length=10, num_walks=100, workers=1, verbose=0, stay_prob=0.3, opt1_reduce_len=True, |
| 39 | opt2_reduce_sim_calc=True, opt3_num_layers=None, temp_path='./temp_struc2vec/', reuse=False): |
| 40 | self.graph = graph |
| 41 | self.idx2node, self.node2idx = preprocess_nxgraph(graph) |
| 42 | self.idx = list(range(len(self.idx2node))) |
| 43 | |
| 44 | self.opt1_reduce_len = opt1_reduce_len |
| 45 | self.opt2_reduce_sim_calc = opt2_reduce_sim_calc |
| 46 | self.opt3_num_layers = opt3_num_layers |
| 47 | |
| 48 | self.resue = reuse |
| 49 | self.temp_path = temp_path |
| 50 | |
| 51 | if not os.path.exists(self.temp_path): |
| 52 | os.mkdir(self.temp_path) |
| 53 | if not reuse: |
| 54 | shutil.rmtree(self.temp_path) |
| 55 | os.mkdir(self.temp_path) |
| 56 | |
| 57 | self.create_context_graph(self.opt3_num_layers, workers, verbose) |
| 58 | self.prepare_biased_walk() |
| 59 | self.walker = BiasedWalker(self.idx2node, self.temp_path) |
| 60 | self.sentences = self.walker.simulate_walks( |
| 61 | num_walks, walk_length, stay_prob, workers, verbose) |
| 62 | |
| 63 | self._embeddings = {} |
| 64 | |
| 65 | def create_context_graph(self, max_num_layers, workers=1, verbose=0, ): |
| 66 | |
| 67 | pair_distances = self._compute_structural_distance( |
| 68 | max_num_layers, workers, verbose, ) |
| 69 | layers_adj, layers_distances = self._get_layer_rep(pair_distances) |
| 70 | pd.to_pickle(layers_adj, self.temp_path + 'layers_adj.pkl') |
| 71 | |
| 72 | layers_accept, layers_alias = self._get_transition_probs( |
| 73 | layers_adj, layers_distances) |
| 74 | pd.to_pickle(layers_alias, self.temp_path + 'layers_alias.pkl') |
| 75 | pd.to_pickle(layers_accept, self.temp_path + 'layers_accept.pkl') |
| 76 | |
| 77 | def prepare_biased_walk(self, ): |
| 78 | |
| 79 | sum_weights = {} |
| 80 | sum_edges = {} |
| 81 | average_weight = {} |
| 82 | gamma = {} |
| 83 | layer = 0 |
| 84 | while (os.path.exists(self.temp_path + 'norm_weights_distance-layer-' + str(layer) + '.pkl')): |
| 85 | probs = pd.read_pickle( |
| 86 | self.temp_path + 'norm_weights_distance-layer-' + str(layer) + '.pkl') |
| 87 | for v, list_weights in probs.items(): |
| 88 | sum_weights.setdefault(layer, 0) |
| 89 | sum_edges.setdefault(layer, 0) |
| 90 | sum_weights[layer] += sum(list_weights) |
| 91 | sum_edges[layer] += len(list_weights) |
| 92 | |
| 93 | average_weight[layer] = sum_weights[layer] / sum_edges[layer] |
| 94 |
no outgoing calls