Fit the gradient boosted decision trees on a dataset. Parameters ---------- X : :py:class:`ndarray ` of shape (N, M) The training data of `N` examples, each with `M` features Y : :py:class:`ndarray ` of shape (N,)
(self, X, Y)
| 92 | self.learning_rate = learning_rate |
| 93 | |
| 94 | def fit(self, X, Y): |
| 95 | """ |
| 96 | Fit the gradient boosted decision trees on a dataset. |
| 97 | |
| 98 | Parameters |
| 99 | ---------- |
| 100 | X : :py:class:`ndarray <numpy.ndarray>` of shape (N, M) |
| 101 | The training data of `N` examples, each with `M` features |
| 102 | Y : :py:class:`ndarray <numpy.ndarray>` of shape (N,) |
| 103 | An array of integer class labels for each example in `X` if |
| 104 | ``self.classifier = True``, otherwise the set of target values for |
| 105 | each example in `X`. |
| 106 | """ |
| 107 | if self.loss == "mse": |
| 108 | loss = MSELoss() |
| 109 | elif self.loss == "crossentropy": |
| 110 | loss = CrossEntropyLoss() |
| 111 | |
| 112 | # convert Y to one_hot if not already |
| 113 | if self.classifier: |
| 114 | Y = to_one_hot(Y.flatten()) |
| 115 | else: |
| 116 | Y = Y.reshape(-1, 1) if len(Y.shape) == 1 else Y |
| 117 | |
| 118 | N, M = X.shape |
| 119 | self.out_dims = Y.shape[1] |
| 120 | self.learners = np.empty((self.n_iter, self.out_dims), dtype=object) |
| 121 | self.weights = np.ones((self.n_iter, self.out_dims)) |
| 122 | self.weights[1:, :] *= self.learning_rate |
| 123 | |
| 124 | # fit the base estimator |
| 125 | Y_pred = np.zeros((N, self.out_dims)) |
| 126 | for k in range(self.out_dims): |
| 127 | t = loss.base_estimator() |
| 128 | t.fit(X, Y[:, k]) |
| 129 | Y_pred[:, k] += t.predict(X) |
| 130 | self.learners[0, k] = t |
| 131 | |
| 132 | # incrementally fit each learner on the negative gradient of the loss |
| 133 | # wrt the previous fit (pseudo-residuals) |
| 134 | for i in range(1, self.n_iter): |
| 135 | for k in range(self.out_dims): |
| 136 | y, y_pred = Y[:, k], Y_pred[:, k] |
| 137 | neg_grad = -1 * loss.grad(y, y_pred) |
| 138 | |
| 139 | # use MSE as the surrogate loss when fitting to negative gradients |
| 140 | t = DecisionTree( |
| 141 | classifier=False, max_depth=self.max_depth, criterion="mse" |
| 142 | ) |
| 143 | |
| 144 | # fit current learner to negative gradients |
| 145 | t.fit(X, neg_grad) |
| 146 | self.learners[i, k] = t |
| 147 | |
| 148 | # compute step size and weight for the current learner |
| 149 | step = 1.0 |
| 150 | h_pred = t.predict(X) |
| 151 | if self.step_size == "adaptive": |