Arguments: docs: The documents you used when calling either `fit` or `fit_transform` topics_to_merge: Either a list of topics or a list of list of topics to merge. For example: [1, 2, 3] will merge topics 1, 2 and 3
(
self,
docs: List[str],
topics_to_merge: List[Union[Iterable[int], int]],
images: List[str] | None = None,
)
| 2099 | return topic_labels |
| 2100 | |
| 2101 | def merge_topics( |
| 2102 | self, |
| 2103 | docs: List[str], |
| 2104 | topics_to_merge: List[Union[Iterable[int], int]], |
| 2105 | images: List[str] | None = None, |
| 2106 | ) -> None: |
| 2107 | """Arguments: |
| 2108 | docs: The documents you used when calling either `fit` or `fit_transform` |
| 2109 | topics_to_merge: Either a list of topics or a list of list of topics |
| 2110 | to merge. For example: |
| 2111 | [1, 2, 3] will merge topics 1, 2 and 3 |
| 2112 | [[1, 2], [3, 4]] will merge topics 1 and 2, and |
| 2113 | separately merge topics 3 and 4. |
| 2114 | images: A list of paths to the images used when calling either |
| 2115 | `fit` or `fit_transform`. |
| 2116 | |
| 2117 | Examples: |
| 2118 | If you want to merge topics 1, 2, and 3: |
| 2119 | |
| 2120 | ```python |
| 2121 | topics_to_merge = [1, 2, 3] |
| 2122 | topic_model.merge_topics(docs, topics_to_merge) |
| 2123 | ``` |
| 2124 | |
| 2125 | or if you want to merge topics 1 and 2, and separately |
| 2126 | merge topics 3 and 4: |
| 2127 | |
| 2128 | ```python |
| 2129 | topics_to_merge = [[1, 2], |
| 2130 | [3, 4]] |
| 2131 | topic_model.merge_topics(docs, topics_to_merge) |
| 2132 | ``` |
| 2133 | """ |
| 2134 | check_is_fitted(self) |
| 2135 | check_documents_type(docs) |
| 2136 | documents = pd.DataFrame( |
| 2137 | { |
| 2138 | "Document": docs, |
| 2139 | "Topic": self.topics_, |
| 2140 | "Image": images, |
| 2141 | "ID": range(len(docs)), |
| 2142 | } |
| 2143 | ) |
| 2144 | |
| 2145 | mapping = {topic: topic for topic in set(self.topics_)} |
| 2146 | if isinstance(topics_to_merge[0], int): |
| 2147 | for topic in sorted(topics_to_merge): |
| 2148 | mapping[topic] = topics_to_merge[0] |
| 2149 | elif isinstance(topics_to_merge[0], Iterable): |
| 2150 | for topic_group in sorted(topics_to_merge): |
| 2151 | for topic in topic_group: |
| 2152 | mapping[topic] = topic_group[0] |
| 2153 | else: |
| 2154 | raise ValueError("Make sure that `topics_to_merge` is eithera list of topics or a list of list of topics.") |
| 2155 | |
| 2156 | # Track mappings and sizes of topics for merging topic embeddings |
| 2157 | mappings = defaultdict(list) |
| 2158 | for key, val in sorted(mapping.items()): |