(documents, document_embeddings, embedding_model)
| 159 | |
| 160 | @pytest.fixture(scope="session") |
| 161 | def online_topic_model(documents, document_embeddings, embedding_model): |
| 162 | umap_model = PCA(n_components=5) |
| 163 | cluster_model = MiniBatchKMeans(n_clusters=50, random_state=0) |
| 164 | vectorizer_model = OnlineCountVectorizer(stop_words="english", decay=0.01) |
| 165 | model = BERTopic( |
| 166 | umap_model=umap_model, |
| 167 | hdbscan_model=cluster_model, |
| 168 | vectorizer_model=vectorizer_model, |
| 169 | embedding_model=embedding_model, |
| 170 | ) |
| 171 | |
| 172 | topics = [] |
| 173 | for index in range(0, len(documents), 50): |
| 174 | model.partial_fit(documents[index : index + 50], document_embeddings[index : index + 50]) |
| 175 | topics.extend(model.topics_) |
| 176 | model.topics_ = topics |
| 177 | return model |
| 178 | |
| 179 | |
| 180 | @pytest.fixture(scope="session") |
nothing calls this directly
no test coverage detected