| 28 | # TODO: fix |
| 29 | @pytest.mark.skip() |
| 30 | def test_PCA(dataset): |
| 31 | X, y = dataset |
| 32 | X_train, X_test, y_train, y_test = train_test_split( |
| 33 | X, y, test_size=0.25, random_state=1111 |
| 34 | ) |
| 35 | p = PCA(50, solver="eigen") |
| 36 | |
| 37 | # fit PCA with training set, not the entire dataset |
| 38 | p.fit(X_train) |
| 39 | X_train_reduced = p.transform(X_train) |
| 40 | X_test_reduced = p.transform(X_test) |
| 41 | |
| 42 | model = RandomForestClassifier(n_estimators=25, max_depth=5) |
| 43 | model.fit(X_train_reduced, y_train) |
| 44 | predictions = model.predict(X_test_reduced)[:, 1] |
| 45 | score = roc_auc_score(y_test, predictions) |
| 46 | assert score >= 0.75 |