| 179 | |
| 180 | @pytest.fixture(scope="session") |
| 181 | def cuml_base_topic_model(documents, document_embeddings, embedding_model): |
| 182 | from cuml.cluster import HDBSCAN as cuml_hdbscan |
| 183 | from cuml.manifold import UMAP as cuml_umap |
| 184 | |
| 185 | model = BERTopic( |
| 186 | embedding_model=embedding_model, |
| 187 | calculate_probabilities=True, |
| 188 | umap_model=cuml_umap(n_components=5, n_neighbors=5, random_state=42), |
| 189 | hdbscan_model=cuml_hdbscan(min_cluster_size=3, prediction_data=True), |
| 190 | ) |
| 191 | model.fit(documents, document_embeddings) |
| 192 | return model |