(
self,
X,
handle_unknown="error",
ensure_all_finite=True,
return_counts=False,
return_and_ignore_missing_for_infrequent=False,
)
| 75 | return X_columns, n_samples, n_features |
| 76 | |
| 77 | def _fit( |
| 78 | self, |
| 79 | X, |
| 80 | handle_unknown="error", |
| 81 | ensure_all_finite=True, |
| 82 | return_counts=False, |
| 83 | return_and_ignore_missing_for_infrequent=False, |
| 84 | ): |
| 85 | self._check_infrequent_enabled() |
| 86 | validate_data(self, X=X, reset=True, skip_check_array=True) |
| 87 | X_list, n_samples, n_features = self._check_X( |
| 88 | X, ensure_all_finite=ensure_all_finite |
| 89 | ) |
| 90 | self.n_features_in_ = n_features |
| 91 | |
| 92 | if self.categories != "auto": |
| 93 | if len(self.categories) != n_features: |
| 94 | raise ValueError( |
| 95 | "Shape mismatch: if categories is an array," |
| 96 | " it has to be of shape (n_features,)." |
| 97 | ) |
| 98 | |
| 99 | self.categories_ = [] |
| 100 | category_counts = [] |
| 101 | compute_counts = return_counts or self._infrequent_enabled |
| 102 | |
| 103 | for i in range(n_features): |
| 104 | Xi = X_list[i] |
| 105 | |
| 106 | if self.categories == "auto": |
| 107 | result = _unique(Xi, return_counts=compute_counts) |
| 108 | if compute_counts: |
| 109 | cats, counts = result |
| 110 | category_counts.append(counts) |
| 111 | else: |
| 112 | cats = result |
| 113 | else: |
| 114 | if np.issubdtype(Xi.dtype, np.str_): |
| 115 | # Always convert string categories to objects to avoid |
| 116 | # unexpected string truncation for longer category labels |
| 117 | # passed in the constructor. |
| 118 | Xi_dtype = object |
| 119 | else: |
| 120 | Xi_dtype = Xi.dtype |
| 121 | |
| 122 | cats = np.array(self.categories[i], dtype=Xi_dtype) |
| 123 | if ( |
| 124 | cats.dtype == object |
| 125 | and isinstance(cats[0], bytes) |
| 126 | and Xi.dtype.kind != "S" |
| 127 | ): |
| 128 | msg = ( |
| 129 | f"In column {i}, the predefined categories have type 'bytes'" |
| 130 | " which is incompatible with values of type" |
| 131 | f" '{type(Xi[0]).__name__}'." |
| 132 | ) |
| 133 | raise ValueError(msg) |
| 134 |
no test coverage detected