MCPcopy
hub / github.com/ddbourgin/numpy-ml / _grow

Method _grow

numpy_ml/trees/dt.py:122–151  ·  view source on GitHub ↗
(self, X, Y, cur_depth=0)

Source from the content-addressed store, hash-verified

120 return np.array([self._traverse(x, self.root, prob=True) for x in X])
121
122 def _grow(self, X, Y, cur_depth=0):
123 # if all labels are the same, return a leaf
124 if len(set(Y)) == 1:
125 if self.classifier:
126 prob = np.zeros(self.n_classes)
127 prob[Y[0]] = 1.0
128 return Leaf(prob) if self.classifier else Leaf(Y[0])
129
130 # if we have reached max_depth, return a leaf
131 if cur_depth >= self.max_depth:
132 v = np.mean(Y, axis=0)
133 if self.classifier:
134 v = np.bincount(Y, minlength=self.n_classes) / len(Y)
135 return Leaf(v)
136
137 cur_depth += 1
138 self.depth = max(self.depth, cur_depth)
139
140 N, M = X.shape
141 feat_idxs = np.random.choice(M, self.n_feats, replace=False)
142
143 # greedily select the best split according to `criterion`
144 feat, thresh = self._segment(X, Y, feat_idxs)
145 l = np.argwhere(X[:, feat] <= thresh).flatten()
146 r = np.argwhere(X[:, feat] > thresh).flatten()
147
148 # grow the children that result from the split
149 left = self._grow(X[l, :], Y[l], cur_depth)
150 right = self._grow(X[r, :], Y[r], cur_depth)
151 return Node(left, right, (feat, thresh))
152
153 def _segment(self, X, Y, feat_idxs):
154 """

Callers 1

fitMethod · 0.95

Calls 3

_segmentMethod · 0.95
LeafClass · 0.85
NodeClass · 0.70

Tested by

no test coverage detected