MCPcopy
hub / github.com/ddbourgin/numpy-ml / _split

Method _split

numpy_ml/utils/data_structures.py:274–290  ·  view source on GitHub ↗
(self, X, y=None)

Source from the content-addressed store, hash-verified

272 return node
273
274 def _split(self, X, y=None):
275 # find the dimension with greatest variance
276 split_dim = np.argmax(np.var(X, axis=0))
277
278 # sort X and y along split_dim
279 sort_ixs = np.argsort(X[:, split_dim])
280 X, y = X[sort_ixs], y[sort_ixs] if y is not None else None
281
282 # divide at median value of split_dim
283 med_ix = X.shape[0] // 2
284 centroid = X[med_ix] # , split_dim
285
286 # split data into two halves at the centroid (median always appears on
287 # the right split)
288 left_X, left_y = X[:med_ix], y[:med_ix] if y is not None else None
289 right_X, right_y = X[med_ix:], y[med_ix:] if y is not None else None
290 return centroid, left_X, left_y, right_X, right_y
291
292 def nearest_neighbors(self, k, x):
293 """

Callers 2

fitMethod · 0.95
_build_treeMethod · 0.95

Calls

no outgoing calls

Tested by

no test coverage detected