Delete topics from the topic model. The deleted topics will be mapped to -1 (outlier topic). Core topic attributes like topic embeddings and c-TF-IDF will be automatically updated. Arguments: topics_to_delete: List of topics to delete
(
self,
topics_to_delete: List[int],
)
| 2175 | self.probabilities_ = self._map_probabilities(self.probabilities_) |
| 2176 | |
| 2177 | def delete_topics( |
| 2178 | self, |
| 2179 | topics_to_delete: List[int], |
| 2180 | ) -> None: |
| 2181 | """Delete topics from the topic model. |
| 2182 | |
| 2183 | The deleted topics will be mapped to -1 (outlier topic). Core topic attributes |
| 2184 | like topic embeddings and c-TF-IDF will be automatically updated. |
| 2185 | |
| 2186 | Arguments: |
| 2187 | topics_to_delete: List of topics to delete |
| 2188 | """ |
| 2189 | check_is_fitted(self) |
| 2190 | |
| 2191 | topics_df = pd.DataFrame({"Topic": self.topics_}) |
| 2192 | |
| 2193 | # Check if -1 exists in the current topics |
| 2194 | had_outliers = -1 in set(self.topics_) |
| 2195 | |
| 2196 | # If adding -1 for the first time, initialize its attributes |
| 2197 | if not had_outliers and any(topic in topics_to_delete for topic in self.topics_): |
| 2198 | # Initialize c-TF-IDF for -1 topic (zeros) |
| 2199 | outlier_row = np.zeros((1, self.c_tf_idf_.shape[1])) |
| 2200 | outlier_row = sp.csr_matrix(outlier_row) |
| 2201 | self.c_tf_idf_ = sp.vstack([outlier_row, self.c_tf_idf_]) |
| 2202 | |
| 2203 | # Initialize topic embeddings for -1 topic (zeros) |
| 2204 | outlier_embedding = np.zeros((1, self.topic_embeddings_.shape[1])) |
| 2205 | self.topic_embeddings_ = np.vstack([outlier_embedding, self.topic_embeddings_]) |
| 2206 | |
| 2207 | # Initialize topic representations for -1 topic: ("", 1e-05) |
| 2208 | self.topic_representations_[-1] = [("", 1e-05)] |
| 2209 | |
| 2210 | # Initialize representative docs for -1 topic (empty list) |
| 2211 | self.representative_docs_[-1] = [] |
| 2212 | |
| 2213 | # Initialize representative images for -1 topic if images are being used |
| 2214 | if self.representative_images_ is not None: |
| 2215 | outlier_image = np.zeros((1, self.representative_images_.shape[1])) |
| 2216 | self.representative_images_ = np.vstack([outlier_image, self.representative_images_]) |
| 2217 | |
| 2218 | # Initialize custom labels for -1 topic if they exist |
| 2219 | if hasattr(self, "custom_labels_") and self.custom_labels_ is not None: |
| 2220 | self.custom_labels_[-1] = "" |
| 2221 | |
| 2222 | # Initialize ctfidf model diagonal for -1 topic (ones) if it exists |
| 2223 | if hasattr(self, "ctfidf_model") and self.ctfidf_model is not None: |
| 2224 | n_features = self.ctfidf_model._idf_diag.shape[1] |
| 2225 | outlier_diag = sp.csr_matrix(([1.0], ([0], [0])), shape=(1, n_features)) |
| 2226 | self.ctfidf_model._idf_diag = sp.vstack([outlier_diag, self.ctfidf_model._idf_diag]) |
| 2227 | |
| 2228 | # Initialize topic aspects for -1 topic (empty dict for each aspect) if they exist |
| 2229 | if hasattr(self, "topic_aspects_") and self.topic_aspects_ is not None: |
| 2230 | for aspect in self.topic_aspects_: |
| 2231 | self.topic_aspects_[aspect][-1] = {} |
| 2232 | |
| 2233 | # First map deleted topics to -1 |
| 2234 | mapping = {topic: -1 if topic in topics_to_delete else topic for topic in set(self.topics_)} |