Select the topic representation. Arguments: ctfidf_embeddings: The c-TF-IDF embedding matrix embeddings: The topic embedding matrix use_ctfidf: Whether to use the c-TF-IDF representation. If False, topics embedding representation is used, if it exists
(
ctfidf_embeddings: Optional[Union[np.ndarray, csr_matrix]] = None,
embeddings: Optional[Union[np.ndarray, csr_matrix]] = None,
use_ctfidf: bool = True,
output_ndarray: bool = False,
)
| 177 | |
| 178 | |
| 179 | def select_topic_representation( |
| 180 | ctfidf_embeddings: Optional[Union[np.ndarray, csr_matrix]] = None, |
| 181 | embeddings: Optional[Union[np.ndarray, csr_matrix]] = None, |
| 182 | use_ctfidf: bool = True, |
| 183 | output_ndarray: bool = False, |
| 184 | ) -> Tuple[np.ndarray, bool]: |
| 185 | """Select the topic representation. |
| 186 | |
| 187 | Arguments: |
| 188 | ctfidf_embeddings: The c-TF-IDF embedding matrix |
| 189 | embeddings: The topic embedding matrix |
| 190 | use_ctfidf: Whether to use the c-TF-IDF representation. If False, topics embedding representation is used, if it |
| 191 | exists. Default is True. |
| 192 | output_ndarray: Whether to convert the selected representation into ndarray |
| 193 | Raises |
| 194 | ValueError: |
| 195 | - If no topic representation was found |
| 196 | - If c-TF-IDF embeddings are not a numpy array or a scipy.sparse.csr_matrix |
| 197 | |
| 198 | Returns: |
| 199 | The selected topic representation and a boolean indicating whether it is c-TF-IDF. |
| 200 | """ |
| 201 | |
| 202 | def to_ndarray(array: Union[np.ndarray, csr_matrix]) -> np.ndarray: |
| 203 | if isinstance(array, csr_matrix): |
| 204 | return array.toarray() |
| 205 | return array |
| 206 | |
| 207 | logger = MyLogger() |
| 208 | |
| 209 | if use_ctfidf: |
| 210 | if ctfidf_embeddings is None: |
| 211 | logger.warning( |
| 212 | "No c-TF-IDF matrix was found despite it is supposed to be used (`use_ctfidf` is True). " |
| 213 | "Defaulting to semantic embeddings." |
| 214 | ) |
| 215 | repr_, ctfidf_used = embeddings, False |
| 216 | else: |
| 217 | repr_, ctfidf_used = ctfidf_embeddings, True |
| 218 | else: |
| 219 | if embeddings is None: |
| 220 | logger.warning( |
| 221 | "No topic embeddings were found despite they are supposed to be used (`use_ctfidf` is False). " |
| 222 | "Defaulting to c-TF-IDF representation." |
| 223 | ) |
| 224 | repr_, ctfidf_used = ctfidf_embeddings, True |
| 225 | else: |
| 226 | repr_, ctfidf_used = embeddings, False |
| 227 | |
| 228 | return to_ndarray(repr_) if output_ndarray else repr_, ctfidf_used |
| 229 | |
| 230 | |
| 231 | class MockPlotlyModule: |