Replace any unknown categoricals with empty categoricals. Useful for preventing ``UNKNOWN_CATEGORIES`` from leaking into results.
(x, just_drop_unknown=False)
| 189 | |
| 190 | |
| 191 | def strip_unknown_categories(x, just_drop_unknown=False): |
| 192 | """Replace any unknown categoricals with empty categoricals. |
| 193 | |
| 194 | Useful for preventing ``UNKNOWN_CATEGORIES`` from leaking into results. |
| 195 | """ |
| 196 | if isinstance(x, (pd.Series, pd.DataFrame)): |
| 197 | x = x.copy() |
| 198 | if isinstance(x, pd.DataFrame): |
| 199 | cat_mask = x.dtypes == "category" |
| 200 | if cat_mask.any(): |
| 201 | cats = cat_mask[cat_mask].index |
| 202 | for c in cats: |
| 203 | if not has_known_categories(x[c]): |
| 204 | if just_drop_unknown: |
| 205 | x[c].cat.remove_categories(UNKNOWN_CATEGORIES, inplace=True) |
| 206 | else: |
| 207 | x[c] = x[c].cat.set_categories([]) |
| 208 | elif isinstance(x, pd.Series): |
| 209 | if isinstance(x.dtype, pd.CategoricalDtype) and not has_known_categories(x): |
| 210 | x = x.cat.set_categories([]) |
| 211 | if isinstance(x.index, pd.CategoricalIndex) and not has_known_categories( |
| 212 | x.index |
| 213 | ): |
| 214 | x.index = x.index.set_categories([]) |
| 215 | elif isinstance(x, pd.CategoricalIndex) and not has_known_categories(x): |
| 216 | x = x.set_categories([]) |
| 217 | return x |
| 218 | |
| 219 | |
| 220 | def clear_known_categories(x, cols=None, index=True, dtype_backend=None): |
no test coverage detected
searching dependent graphs…