(self, X, y=None, force_transform=False)
| 3407 | return self._fit(X, y, force_transform=True) |
| 3408 | |
| 3409 | def _fit(self, X, y=None, force_transform=False): |
| 3410 | X = self._check_input(X, in_fit=True, check_positive=True) |
| 3411 | |
| 3412 | if not self.copy and not force_transform: # if call from fit() |
| 3413 | X = X.copy() # force copy so that fit does not change X inplace |
| 3414 | |
| 3415 | n_samples = X.shape[0] |
| 3416 | mean = np.mean(X, axis=0, dtype=np.float64) |
| 3417 | var = np.var(X, axis=0, dtype=np.float64) |
| 3418 | |
| 3419 | optim_function = { |
| 3420 | "box-cox": self._box_cox_optimize, |
| 3421 | "yeo-johnson": self._yeo_johnson_optimize, |
| 3422 | }[self.method] |
| 3423 | |
| 3424 | transform_function = { |
| 3425 | "box-cox": boxcox, |
| 3426 | "yeo-johnson": stats.yeojohnson, |
| 3427 | }[self.method] |
| 3428 | |
| 3429 | with np.errstate(invalid="ignore"): # hide NaN warnings |
| 3430 | self.lambdas_ = np.empty(X.shape[1], dtype=X.dtype) |
| 3431 | for i, col in enumerate(X.T): |
| 3432 | # For yeo-johnson, leave constant features unchanged |
| 3433 | # lambda=1 corresponds to the identity transformation |
| 3434 | is_constant_feature = _is_constant_feature(var[i], mean[i], n_samples) |
| 3435 | if self.method == "yeo-johnson" and is_constant_feature: |
| 3436 | self.lambdas_[i] = 1.0 |
| 3437 | continue |
| 3438 | |
| 3439 | self.lambdas_[i] = optim_function(col) |
| 3440 | |
| 3441 | if self.standardize or force_transform: |
| 3442 | X[:, i] = transform_function(X[:, i], self.lambdas_[i]) |
| 3443 | |
| 3444 | if self.standardize: |
| 3445 | self._scaler = StandardScaler(copy=False).set_output(transform="default") |
| 3446 | if force_transform: |
| 3447 | X = self._scaler.fit_transform(X) |
| 3448 | else: |
| 3449 | self._scaler.fit(X) |
| 3450 | |
| 3451 | return X |
| 3452 | |
| 3453 | def transform(self, X): |
| 3454 | """Apply the power transform to each feature using the fitted lambdas. |
no test coverage detected