MCPcopy
hub / github.com/MaartenGr/BERTopic / get_topic_info

Method get_topic_info

bertopic/_bertopic.py:1650–1701  ·  view source on GitHub ↗

Get information about each topic including its ID, frequency, and name. Arguments: topic: A specific topic for which you want the frequency Returns: info: The information relating to either a single topic or all topics Examples: ```python

(self, topic: int | None = None)

Source from the content-addressed store, hash-verified

1648 return False
1649
1650 def get_topic_info(self, topic: int | None = None) -> pd.DataFrame:
1651 """Get information about each topic including its ID, frequency, and name.
1652
1653 Arguments:
1654 topic: A specific topic for which you want the frequency
1655
1656 Returns:
1657 info: The information relating to either a single topic or all topics
1658
1659 Examples:
1660 ```python
1661 info_df = topic_model.get_topic_info()
1662 ```
1663 """
1664 check_is_fitted(self)
1665
1666 info = pd.DataFrame(self.topic_sizes_.items(), columns=["Topic", "Count"]).sort_values("Topic")
1667 info["Name"] = info.Topic.map(self.topic_labels_)
1668
1669 # Custom label
1670 if self.custom_labels_ is not None:
1671 if len(self.custom_labels_) == len(info):
1672 labels = {topic - self._outliers: label for topic, label in enumerate(self.custom_labels_)}
1673 info["CustomName"] = info["Topic"].map(labels)
1674
1675 # Main Keywords
1676 values = {topic: list(next(zip(*values))) for topic, values in self.topic_representations_.items()}
1677 info["Representation"] = info["Topic"].map(values)
1678
1679 # Extract all topic aspects
1680 if self.topic_aspects_:
1681 for aspect, values in self.topic_aspects_.items():
1682 if isinstance(list(values.values())[-1], list):
1683 if isinstance(list(values.values())[-1][0], tuple) or isinstance(
1684 list(values.values())[-1][0], list
1685 ):
1686 values = {topic: list(next(zip(*value))) for topic, value in values.items()}
1687 elif isinstance(list(values.values())[-1][0], str):
1688 values = {topic: " ".join(value).strip() for topic, value in values.items()}
1689 info[aspect] = info["Topic"].map(values)
1690
1691 # Representative Docs / Images
1692 if self.representative_docs_ is not None:
1693 info["Representative_Docs"] = info["Topic"].map(self.representative_docs_)
1694 if self.representative_images_ is not None:
1695 info["Representative_Images"] = info["Topic"].map(self.representative_images_)
1696
1697 # Select specific topic to return
1698 if topic is not None:
1699 info = info.loc[info.Topic == topic, :]
1700
1701 return info.reset_index(drop=True)
1702
1703 def get_topic_freq(self, topic: int | None = None) -> Union[pd.DataFrame, int]:
1704 """Return the size of topics (descending order).

Callers 8

get_document_infoMethod · 0.95
set_topic_labelsMethod · 0.95
test_full_modelFunction · 0.80
test_mergeFunction · 0.80
test_deleteFunction · 0.80
test_get_topicFunction · 0.80
test_get_topic_infoFunction · 0.80
visualize_term_rankFunction · 0.80

Calls 1

check_is_fittedFunction · 0.90

Tested by 5

test_full_modelFunction · 0.64
test_mergeFunction · 0.64
test_deleteFunction · 0.64
test_get_topicFunction · 0.64
test_get_topic_infoFunction · 0.64