Get labels for each topic in a user-defined format. Arguments: nr_words: Top `n` words per topic to use topic_prefix: Whether to use the topic ID as a prefix. If set to True, the topic ID will be separated using the
(
self,
nr_words: int = 3,
topic_prefix: bool = True,
word_length: int | None = None,
separator: str = "_",
aspect: str | None = None,
)
| 2040 | self.custom_labels_ = custom_labels |
| 2041 | |
| 2042 | def generate_topic_labels( |
| 2043 | self, |
| 2044 | nr_words: int = 3, |
| 2045 | topic_prefix: bool = True, |
| 2046 | word_length: int | None = None, |
| 2047 | separator: str = "_", |
| 2048 | aspect: str | None = None, |
| 2049 | ) -> List[str]: |
| 2050 | """Get labels for each topic in a user-defined format. |
| 2051 | |
| 2052 | Arguments: |
| 2053 | nr_words: Top `n` words per topic to use |
| 2054 | topic_prefix: Whether to use the topic ID as a prefix. |
| 2055 | If set to True, the topic ID will be separated |
| 2056 | using the `separator` |
| 2057 | word_length: The maximum length of each word in the topic label. |
| 2058 | Some words might be relatively long and setting this |
| 2059 | value helps to make sure that all labels have relatively |
| 2060 | similar lengths. |
| 2061 | separator: The string with which the words and topic prefix will be |
| 2062 | separated. Underscores are the default but a nice alternative |
| 2063 | is `", "`. |
| 2064 | aspect: The aspect from which to generate topic labels |
| 2065 | |
| 2066 | Returns: |
| 2067 | topic_labels: A list of topic labels sorted from the lowest topic ID to the highest. |
| 2068 | If the topic model was trained using HDBSCAN, the lowest topic ID is -1, |
| 2069 | otherwise it is 0. |
| 2070 | |
| 2071 | Examples: |
| 2072 | To create our custom topic labels, usage is rather straightforward: |
| 2073 | |
| 2074 | ```python |
| 2075 | topic_labels = topic_model.generate_topic_labels(nr_words=2, separator=", ") |
| 2076 | ``` |
| 2077 | """ |
| 2078 | unique_topics = sorted(set(self.topics_)) |
| 2079 | |
| 2080 | topic_labels = [] |
| 2081 | for topic in unique_topics: |
| 2082 | if aspect: |
| 2083 | words, _ = zip(*self.topic_aspects_[aspect][topic]) |
| 2084 | else: |
| 2085 | words, _ = zip(*self.get_topic(topic)) |
| 2086 | |
| 2087 | if word_length: |
| 2088 | words = [word[:word_length] for word in words][:nr_words] |
| 2089 | else: |
| 2090 | words = list(words)[:nr_words] |
| 2091 | |
| 2092 | if topic_prefix: |
| 2093 | topic_label = f"{topic}{separator}" + separator.join(words) |
| 2094 | else: |
| 2095 | topic_label = separator.join(words) |
| 2096 | |
| 2097 | topic_labels.append(topic_label) |
| 2098 | |
| 2099 | return topic_labels |