Find the optimal split rule (feature index and split threshold) for the data according to `self.criterion`.
(self, X, Y, feat_idxs)
| 151 | return Node(left, right, (feat, thresh)) |
| 152 | |
| 153 | def _segment(self, X, Y, feat_idxs): |
| 154 | """ |
| 155 | Find the optimal split rule (feature index and split threshold) for the |
| 156 | data according to `self.criterion`. |
| 157 | """ |
| 158 | best_gain = -np.inf |
| 159 | split_idx, split_thresh = None, None |
| 160 | for i in feat_idxs: |
| 161 | vals = X[:, i] |
| 162 | levels = np.unique(vals) |
| 163 | thresholds = (levels[:-1] + levels[1:]) / 2 if len(levels) > 1 else levels |
| 164 | gains = np.array([self._impurity_gain(Y, t, vals) for t in thresholds]) |
| 165 | |
| 166 | if gains.max() > best_gain: |
| 167 | split_idx = i |
| 168 | best_gain = gains.max() |
| 169 | split_thresh = thresholds[gains.argmax()] |
| 170 | |
| 171 | return split_idx, split_thresh |
| 172 | |
| 173 | def _impurity_gain(self, Y, split_thresh, feat_values): |
| 174 | """ |