()
| 55 | |
| 56 | |
| 57 | def test_select_topic_representation(): |
| 58 | ctfidf_embeddings = np.array([[1, 1, 1]]) |
| 59 | ctfidf_embeddings_sparse = csr_matrix( |
| 60 | (ctfidf_embeddings.reshape(-1).tolist(), ([0, 0, 0], [0, 1, 2])), |
| 61 | shape=ctfidf_embeddings.shape, |
| 62 | ) |
| 63 | topic_embeddings = np.array([[2, 2, 2]]) |
| 64 | |
| 65 | # Use topic embeddings |
| 66 | repr_, ctfidf_used = select_topic_representation(ctfidf_embeddings, topic_embeddings, use_ctfidf=False) |
| 67 | np.testing.assert_array_equal(topic_embeddings, repr_) |
| 68 | assert not ctfidf_used |
| 69 | |
| 70 | # Fallback to c-TF-IDF |
| 71 | repr_, ctfidf_used = select_topic_representation(ctfidf_embeddings, None, use_ctfidf=False) |
| 72 | np.testing.assert_array_equal(ctfidf_embeddings, repr_) |
| 73 | assert ctfidf_used |
| 74 | |
| 75 | # Use c-TF-IDF |
| 76 | repr_, ctfidf_used = select_topic_representation(ctfidf_embeddings, topic_embeddings, use_ctfidf=True) |
| 77 | np.testing.assert_array_equal(ctfidf_embeddings, repr_) |
| 78 | assert ctfidf_used |
| 79 | |
| 80 | # Fallback to topic embeddings |
| 81 | repr_, ctfidf_used = select_topic_representation(None, topic_embeddings, use_ctfidf=True) |
| 82 | np.testing.assert_array_equal(topic_embeddings, repr_) |
| 83 | assert not ctfidf_used |
| 84 | |
| 85 | # `scipy.sparse.csr_matrix` can be used as c-TF-IDF embeddings |
| 86 | np.testing.assert_array_equal( |
| 87 | ctfidf_embeddings, |
| 88 | select_topic_representation(ctfidf_embeddings_sparse, None, use_ctfidf=True, output_ndarray=True)[0], |
| 89 | ) |
| 90 | |
| 91 | # check that `csr_matrix` is not casted to `np.ndarray` when `ctfidf_as_ndarray` is False |
| 92 | repr_ = select_topic_representation(ctfidf_embeddings_sparse, None, output_ndarray=False)[0] |
| 93 | |
| 94 | assert isinstance(repr_, csr_matrix) |
nothing calls this directly
no test coverage detected