(documents, document_embeddings, embedding_model)
| 90 | |
| 91 | @pytest.fixture(scope="session") |
| 92 | def representation_topic_model(documents, document_embeddings, embedding_model): |
| 93 | umap_model = UMAP(n_neighbors=15, n_components=6, min_dist=0.0, metric="cosine", random_state=42) |
| 94 | hdbscan_model = HDBSCAN( |
| 95 | min_cluster_size=3, |
| 96 | metric="euclidean", |
| 97 | cluster_selection_method="eom", |
| 98 | prediction_data=True, |
| 99 | ) |
| 100 | representation_model = { |
| 101 | "Main": KeyBERTInspired(), |
| 102 | "MMR": [KeyBERTInspired(top_n_words=30), MaximalMarginalRelevance()], |
| 103 | } |
| 104 | model = BERTopic( |
| 105 | umap_model=umap_model, |
| 106 | hdbscan_model=hdbscan_model, |
| 107 | embedding_model=embedding_model, |
| 108 | representation_model=representation_model, |
| 109 | calculate_probabilities=True, |
| 110 | ).fit(documents, document_embeddings) |
| 111 | return model |
| 112 | |
| 113 | |
| 114 | @pytest.fixture(scope="session") |
nothing calls this directly
no test coverage detected