(documents, document_embeddings)
| 134 | |
| 135 | @pytest.fixture(scope="session") |
| 136 | def kmeans_pca_topic_model(documents, document_embeddings): |
| 137 | hdbscan_model = KMeans(n_clusters=15, random_state=42) |
| 138 | dim_model = PCA(n_components=5) |
| 139 | model = BERTopic( |
| 140 | hdbscan_model=hdbscan_model, |
| 141 | umap_model=dim_model, |
| 142 | embedding_model=embedding_model, |
| 143 | ).fit(documents, document_embeddings) |
| 144 | return model |
| 145 | |
| 146 | |
| 147 | @pytest.fixture(scope="session") |