| 120 | return np.array([self._traverse(x, self.root, prob=True) for x in X]) |
| 121 | |
| 122 | def _grow(self, X, Y, cur_depth=0): |
| 123 | # if all labels are the same, return a leaf |
| 124 | if len(set(Y)) == 1: |
| 125 | if self.classifier: |
| 126 | prob = np.zeros(self.n_classes) |
| 127 | prob[Y[0]] = 1.0 |
| 128 | return Leaf(prob) if self.classifier else Leaf(Y[0]) |
| 129 | |
| 130 | # if we have reached max_depth, return a leaf |
| 131 | if cur_depth >= self.max_depth: |
| 132 | v = np.mean(Y, axis=0) |
| 133 | if self.classifier: |
| 134 | v = np.bincount(Y, minlength=self.n_classes) / len(Y) |
| 135 | return Leaf(v) |
| 136 | |
| 137 | cur_depth += 1 |
| 138 | self.depth = max(self.depth, cur_depth) |
| 139 | |
| 140 | N, M = X.shape |
| 141 | feat_idxs = np.random.choice(M, self.n_feats, replace=False) |
| 142 | |
| 143 | # greedily select the best split according to `criterion` |
| 144 | feat, thresh = self._segment(X, Y, feat_idxs) |
| 145 | l = np.argwhere(X[:, feat] <= thresh).flatten() |
| 146 | r = np.argwhere(X[:, feat] > thresh).flatten() |
| 147 | |
| 148 | # grow the children that result from the split |
| 149 | left = self._grow(X[l, :], Y[l], cur_depth) |
| 150 | right = self._grow(X[r, :], Y[r], cur_depth) |
| 151 | return Node(left, right, (feat, thresh)) |
| 152 | |
| 153 | def _segment(self, X, Y, feat_idxs): |
| 154 | """ |