Test that balance_probabilities=True works correctly.
()
| 691 | |
| 692 | @pytest.mark.skip(reason="This test is flaky and needs to be fixed.") |
| 693 | def test_balanced_probabilities() -> None: |
| 694 | """Test that balance_probabilities=True works correctly.""" |
| 695 | n_classes = 3 |
| 696 | n_features = 3 |
| 697 | |
| 698 | # Create an IMBALANCED dataset |
| 699 | X, y = sklearn.datasets.make_classification( |
| 700 | n_samples=60, |
| 701 | n_classes=n_classes, |
| 702 | n_features=n_features, |
| 703 | n_informative=n_features, |
| 704 | n_redundant=0, |
| 705 | weights=[0.8, 0.1, 0.1], |
| 706 | random_state=42, |
| 707 | ) |
| 708 | |
| 709 | model_unbalanced = TabPFNClassifier( |
| 710 | balance_probabilities=False, |
| 711 | random_state=42, |
| 712 | n_estimators=2, |
| 713 | ) |
| 714 | model_unbalanced.fit(X, y) |
| 715 | proba_unbalanced = model_unbalanced.predict_proba(X) |
| 716 | |
| 717 | model_balanced = TabPFNClassifier( |
| 718 | balance_probabilities=True, |
| 719 | random_state=42, |
| 720 | n_estimators=2, |
| 721 | ) |
| 722 | model_balanced.fit(X, y) |
| 723 | proba_balanced = model_balanced.predict_proba(X) |
| 724 | |
| 725 | mean_proba_unbalanced = proba_unbalanced.mean(axis=0) |
| 726 | mean_proba_balanced = proba_balanced.mean(axis=0) |
| 727 | |
| 728 | # Balanced should be MORE uniform than unbalanced |
| 729 | balanced_deviation = np.std(mean_proba_balanced) |
| 730 | unbalanced_deviation = np.std(mean_proba_unbalanced) |
| 731 | assert balanced_deviation < unbalanced_deviation, ( |
| 732 | "Balancing did not make probabilities more uniform" |
| 733 | ) |
| 734 | |
| 735 | |
| 736 | def test_classifier_in_pipeline(X_y: tuple[np.ndarray, np.ndarray]) -> None: |
nothing calls this directly
no test coverage detected