MCPcopy
hub / github.com/ddbourgin/numpy-ml / _segment

Method _segment

numpy_ml/trees/dt.py:153–171  ·  view source on GitHub ↗

Find the optimal split rule (feature index and split threshold) for the data according to `self.criterion`.

(self, X, Y, feat_idxs)

Source from the content-addressed store, hash-verified

151 return Node(left, right, (feat, thresh))
152
153 def _segment(self, X, Y, feat_idxs):
154 """
155 Find the optimal split rule (feature index and split threshold) for the
156 data according to `self.criterion`.
157 """
158 best_gain = -np.inf
159 split_idx, split_thresh = None, None
160 for i in feat_idxs:
161 vals = X[:, i]
162 levels = np.unique(vals)
163 thresholds = (levels[:-1] + levels[1:]) / 2 if len(levels) > 1 else levels
164 gains = np.array([self._impurity_gain(Y, t, vals) for t in thresholds])
165
166 if gains.max() > best_gain:
167 split_idx = i
168 best_gain = gains.max()
169 split_thresh = thresholds[gains.argmax()]
170
171 return split_idx, split_thresh
172
173 def _impurity_gain(self, Y, split_thresh, feat_values):
174 """

Callers 1

_growMethod · 0.95

Calls 1

_impurity_gainMethod · 0.95

Tested by

no test coverage detected