MCPcopy
hub / github.com/MaartenGr/BERTopic / online_topic_model

Function online_topic_model

tests/conftest.py:161–177  ·  view source on GitHub ↗
(documents, document_embeddings, embedding_model)

Source from the content-addressed store, hash-verified

159
160@pytest.fixture(scope="session")
161def online_topic_model(documents, document_embeddings, embedding_model):
162 umap_model = PCA(n_components=5)
163 cluster_model = MiniBatchKMeans(n_clusters=50, random_state=0)
164 vectorizer_model = OnlineCountVectorizer(stop_words="english", decay=0.01)
165 model = BERTopic(
166 umap_model=umap_model,
167 hdbscan_model=cluster_model,
168 vectorizer_model=vectorizer_model,
169 embedding_model=embedding_model,
170 )
171
172 topics = []
173 for index in range(0, len(documents), 50):
174 model.partial_fit(documents[index : index + 50], document_embeddings[index : index + 50])
175 topics.extend(model.topics_)
176 model.topics_ = topics
177 return model
178
179
180@pytest.fixture(scope="session")

Callers

nothing calls this directly

Calls 3

partial_fitMethod · 0.95
BERTopicClass · 0.90

Tested by

no test coverage detected