Set or check the `feature_names_in_` attribute of an estimator. .. versionadded:: 1.0 .. versionchanged:: 1.6 Moved from :class:`~sklearn.base.BaseEstimator` to :mod:`sklearn.utils.validation`. .. note:: To only check feature names without conducting a full dat
(estimator, X, *, reset)
| 2767 | |
| 2768 | |
| 2769 | def _check_feature_names(estimator, X, *, reset): |
| 2770 | """Set or check the `feature_names_in_` attribute of an estimator. |
| 2771 | |
| 2772 | .. versionadded:: 1.0 |
| 2773 | |
| 2774 | .. versionchanged:: 1.6 |
| 2775 | Moved from :class:`~sklearn.base.BaseEstimator` to |
| 2776 | :mod:`sklearn.utils.validation`. |
| 2777 | |
| 2778 | .. note:: |
| 2779 | To only check feature names without conducting a full data validation, prefer |
| 2780 | using `validate_data(..., skip_check_array=True)` if possible. |
| 2781 | |
| 2782 | Parameters |
| 2783 | ---------- |
| 2784 | estimator : estimator instance |
| 2785 | The estimator to validate the input for. |
| 2786 | |
| 2787 | X : {ndarray, dataframe} of shape (n_samples, n_features) |
| 2788 | The input samples. |
| 2789 | |
| 2790 | reset : bool |
| 2791 | Whether to reset the `feature_names_in_` attribute. |
| 2792 | If True, resets the `feature_names_in_` attribute as inferred from `X`. |
| 2793 | If False, the input will be checked for consistency with |
| 2794 | feature names of data provided when reset was last True. |
| 2795 | |
| 2796 | .. note:: |
| 2797 | It is recommended to call `reset=True` in `fit` and in the first |
| 2798 | call to `partial_fit`. All other methods that validate `X` |
| 2799 | should set `reset=False`. |
| 2800 | """ |
| 2801 | |
| 2802 | if reset: |
| 2803 | feature_names_in = _get_feature_names(X) |
| 2804 | if feature_names_in is not None: |
| 2805 | estimator.feature_names_in_ = feature_names_in |
| 2806 | elif hasattr(estimator, "feature_names_in_"): |
| 2807 | # Delete the attribute when the estimator is fitted on a new dataset |
| 2808 | # that has no feature names. |
| 2809 | delattr(estimator, "feature_names_in_") |
| 2810 | return |
| 2811 | |
| 2812 | fitted_feature_names = getattr(estimator, "feature_names_in_", None) |
| 2813 | X_feature_names = _get_feature_names(X) |
| 2814 | |
| 2815 | if fitted_feature_names is None and X_feature_names is None: |
| 2816 | # no feature names seen in fit and in X |
| 2817 | return |
| 2818 | |
| 2819 | if X_feature_names is not None and fitted_feature_names is None: |
| 2820 | warnings.warn( |
| 2821 | f"X has feature names, but {estimator.__class__.__name__} was fitted " |
| 2822 | "without feature names" |
| 2823 | ) |
| 2824 | return |
| 2825 | |
| 2826 | if X_feature_names is None and fitted_feature_names is not None: |
searching dependent graphs…