(self, X, y=None)
| 272 | return node |
| 273 | |
| 274 | def _split(self, X, y=None): |
| 275 | # find the dimension with greatest variance |
| 276 | split_dim = np.argmax(np.var(X, axis=0)) |
| 277 | |
| 278 | # sort X and y along split_dim |
| 279 | sort_ixs = np.argsort(X[:, split_dim]) |
| 280 | X, y = X[sort_ixs], y[sort_ixs] if y is not None else None |
| 281 | |
| 282 | # divide at median value of split_dim |
| 283 | med_ix = X.shape[0] // 2 |
| 284 | centroid = X[med_ix] # , split_dim |
| 285 | |
| 286 | # split data into two halves at the centroid (median always appears on |
| 287 | # the right split) |
| 288 | left_X, left_y = X[:med_ix], y[:med_ix] if y is not None else None |
| 289 | right_X, right_y = X[med_ix:], y[med_ix:] if y is not None else None |
| 290 | return centroid, left_X, left_y, right_X, right_y |
| 291 | |
| 292 | def nearest_neighbors(self, k, x): |
| 293 | """ |
no outgoing calls
no test coverage detected