Principal component analysis (PCA).
| 851 | |
| 852 | |
| 853 | class _PCA: |
| 854 | """Principal component analysis (PCA).""" |
| 855 | |
| 856 | # Adapted from sklearn and stripped down to just use linalg.svd |
| 857 | # and make it easier to later provide a "center" option if we want |
| 858 | |
| 859 | def __init__(self, n_components=None, whiten=False): |
| 860 | self.n_components = n_components |
| 861 | self.whiten = whiten |
| 862 | |
| 863 | def fit_transform(self, X, y=None): |
| 864 | X = X.copy() |
| 865 | U, S, _ = self._fit(X) |
| 866 | U = U[:, : self.n_components_] |
| 867 | |
| 868 | if self.whiten: |
| 869 | # X_new = X * V / S * sqrt(n_samples) = U * sqrt(n_samples) |
| 870 | U *= sqrt(X.shape[0] - 1) |
| 871 | else: |
| 872 | # X_new = X * V = U * S * V^T * V = U * S |
| 873 | U *= S[: self.n_components_] |
| 874 | |
| 875 | return U |
| 876 | |
| 877 | def fit(self, X): |
| 878 | self._fit(X) |
| 879 | |
| 880 | def _fit(self, X): |
| 881 | if self.n_components is None: |
| 882 | n_components = min(X.shape) |
| 883 | else: |
| 884 | n_components = self.n_components |
| 885 | n_samples, n_features = X.shape |
| 886 | |
| 887 | if n_components == "mle": |
| 888 | if n_samples < n_features: |
| 889 | raise ValueError( |
| 890 | "n_components='mle' is only supported if n_samples >= n_features" |
| 891 | ) |
| 892 | elif not 0 <= n_components <= min(n_samples, n_features): |
| 893 | raise ValueError( |
| 894 | f"n_components={repr(n_components)} must be between 0 and " |
| 895 | f"min(n_samples, n_features)={repr(min(n_samples, n_features))} with " |
| 896 | "svd_solver='full'" |
| 897 | ) |
| 898 | elif n_components >= 1: |
| 899 | if not isinstance(n_components, numbers.Integral | np.integer): |
| 900 | raise ValueError( |
| 901 | f"n_components={repr(n_components)} must be of type int " |
| 902 | f"when greater than or equal to 1, " |
| 903 | f"was of type={repr(type(n_components))}" |
| 904 | ) |
| 905 | |
| 906 | self.mean_ = np.mean(X, axis=0) |
| 907 | X -= self.mean_ |
| 908 | |
| 909 | U, S, V = _safe_svd(X, full_matrices=False) |
| 910 | # flip eigenvectors' sign to enforce deterministic output |