| 72 | |
| 73 | @pytest.fixture(scope="session") |
| 74 | def custom_topic_model(documents, document_embeddings, embedding_model): |
| 75 | umap_model = UMAP(n_neighbors=15, n_components=6, min_dist=0.0, metric="cosine", random_state=42) |
| 76 | hdbscan_model = HDBSCAN( |
| 77 | min_cluster_size=3, |
| 78 | metric="euclidean", |
| 79 | cluster_selection_method="eom", |
| 80 | prediction_data=True, |
| 81 | ) |
| 82 | model = BERTopic( |
| 83 | umap_model=umap_model, |
| 84 | hdbscan_model=hdbscan_model, |
| 85 | embedding_model=embedding_model, |
| 86 | calculate_probabilities=True, |
| 87 | ).fit(documents, document_embeddings) |
| 88 | return model |
| 89 | |
| 90 | |
| 91 | @pytest.fixture(scope="session") |