Visualize topics, their sizes, and their corresponding words. This visualization is highly inspired by LDAvis, a great visualization technique typically reserved for LDA. Arguments: topic_model: A fitted BERTopic instance. topics: A selection of topics to visualize
(
topic_model,
topics: List[int] | None = None,
top_n_topics: int | None = None,
use_ctfidf: bool = False,
custom_labels: Union[bool, str] = False,
title: str = "<b>Intertopic Distance Map</b>",
width: int = 650,
height: int = 650,
)
| 16 | |
| 17 | |
| 18 | def visualize_topics( |
| 19 | topic_model, |
| 20 | topics: List[int] | None = None, |
| 21 | top_n_topics: int | None = None, |
| 22 | use_ctfidf: bool = False, |
| 23 | custom_labels: Union[bool, str] = False, |
| 24 | title: str = "<b>Intertopic Distance Map</b>", |
| 25 | width: int = 650, |
| 26 | height: int = 650, |
| 27 | ) -> go.Figure: |
| 28 | """Visualize topics, their sizes, and their corresponding words. |
| 29 | |
| 30 | This visualization is highly inspired by LDAvis, a great visualization |
| 31 | technique typically reserved for LDA. |
| 32 | |
| 33 | Arguments: |
| 34 | topic_model: A fitted BERTopic instance. |
| 35 | topics: A selection of topics to visualize |
| 36 | top_n_topics: Only select the top n most frequent topics |
| 37 | use_ctfidf: Whether to use c-TF-IDF representations instead of the embeddings from the embedding model. |
| 38 | custom_labels: If bool, whether to use custom topic labels that were defined using |
| 39 | `topic_model.set_topic_labels`. |
| 40 | If `str`, it uses labels from other aspects, e.g., "Aspect1". |
| 41 | title: Title of the plot. |
| 42 | width: The width of the figure. |
| 43 | height: The height of the figure. |
| 44 | |
| 45 | Examples: |
| 46 | To visualize the topics simply run: |
| 47 | |
| 48 | ```python |
| 49 | topic_model.visualize_topics() |
| 50 | ``` |
| 51 | |
| 52 | Or if you want to save the resulting figure: |
| 53 | |
| 54 | ```python |
| 55 | fig = topic_model.visualize_topics() |
| 56 | fig.write_html("path/to/file.html") |
| 57 | ``` |
| 58 | <iframe src="../../getting_started/visualization/viz.html" |
| 59 | style="width:1000px; height: 680px; border: 0px;""></iframe> |
| 60 | """ |
| 61 | # Select topics based on top_n and topics args |
| 62 | freq_df = topic_model.get_topic_freq() |
| 63 | freq_df = freq_df.loc[freq_df.Topic != -1, :] |
| 64 | if topics is not None: |
| 65 | topics = list(topics) |
| 66 | elif top_n_topics is not None: |
| 67 | topics = sorted(freq_df.Topic.to_list()[:top_n_topics]) |
| 68 | else: |
| 69 | topics = sorted(freq_df.Topic.to_list()) |
| 70 | |
| 71 | # Extract topic words and their frequencies |
| 72 | topic_list = sorted(topics) |
| 73 | frequencies = [topic_model.topic_sizes_[topic] for topic in topic_list] |
| 74 | if isinstance(custom_labels, str): |
| 75 | words = [[[str(topic), None]] + topic_model.topic_aspects_[custom_labels][topic] for topic in topic_list] |
nothing calls this directly
no test coverage detected