MCPcopy
hub / github.com/MaartenGr/BERTopic / test_select_topic_representation

Function test_select_topic_representation

tests/test_utils.py:57–94  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

55
56
57def test_select_topic_representation():
58 ctfidf_embeddings = np.array([[1, 1, 1]])
59 ctfidf_embeddings_sparse = csr_matrix(
60 (ctfidf_embeddings.reshape(-1).tolist(), ([0, 0, 0], [0, 1, 2])),
61 shape=ctfidf_embeddings.shape,
62 )
63 topic_embeddings = np.array([[2, 2, 2]])
64
65 # Use topic embeddings
66 repr_, ctfidf_used = select_topic_representation(ctfidf_embeddings, topic_embeddings, use_ctfidf=False)
67 np.testing.assert_array_equal(topic_embeddings, repr_)
68 assert not ctfidf_used
69
70 # Fallback to c-TF-IDF
71 repr_, ctfidf_used = select_topic_representation(ctfidf_embeddings, None, use_ctfidf=False)
72 np.testing.assert_array_equal(ctfidf_embeddings, repr_)
73 assert ctfidf_used
74
75 # Use c-TF-IDF
76 repr_, ctfidf_used = select_topic_representation(ctfidf_embeddings, topic_embeddings, use_ctfidf=True)
77 np.testing.assert_array_equal(ctfidf_embeddings, repr_)
78 assert ctfidf_used
79
80 # Fallback to topic embeddings
81 repr_, ctfidf_used = select_topic_representation(None, topic_embeddings, use_ctfidf=True)
82 np.testing.assert_array_equal(topic_embeddings, repr_)
83 assert not ctfidf_used
84
85 # `scipy.sparse.csr_matrix` can be used as c-TF-IDF embeddings
86 np.testing.assert_array_equal(
87 ctfidf_embeddings,
88 select_topic_representation(ctfidf_embeddings_sparse, None, use_ctfidf=True, output_ndarray=True)[0],
89 )
90
91 # check that `csr_matrix` is not casted to `np.ndarray` when `ctfidf_as_ndarray` is False
92 repr_ = select_topic_representation(ctfidf_embeddings_sparse, None, output_ndarray=False)[0]
93
94 assert isinstance(repr_, csr_matrix)

Callers

nothing calls this directly

Calls 1

Tested by

no test coverage detected