MCPcopy
hub / github.com/MaartenGr/BERTopic / _map_probabilities

Method _map_probabilities

bertopic/_bertopic.py:4767–4802  ·  view source on GitHub ↗

Map the probabilities to the reduced topics. This is achieved by adding together the probabilities of all topics that are mapped to the same topic. Then, the topics that were mapped from are set to 0 as they were reduced. Arguments: probabilities:

(
        self, probabilities: Union[np.ndarray, None], original_topics: bool = False
    )

Source from the content-addressed store, hash-verified

4765 return documents
4766
4767 def _map_probabilities(
4768 self, probabilities: Union[np.ndarray, None], original_topics: bool = False
4769 ) -> Union[np.ndarray, None]:
4770 """Map the probabilities to the reduced topics.
4771 This is achieved by adding together the probabilities
4772 of all topics that are mapped to the same topic. Then,
4773 the topics that were mapped from are set to 0 as they
4774 were reduced.
4775
4776 Arguments:
4777 probabilities: An array containing probabilities
4778 original_topics: Whether we want to map from the
4779 original topics to the most recent topics
4780 or from the second-most recent topics.
4781
4782 Returns:
4783 mapped_probabilities: Updated probabilities
4784 """
4785 mappings = self.topic_mapper_.get_mappings(original_topics)
4786
4787 # Map array of probabilities (probability for assigned topic per document)
4788 if probabilities is not None:
4789 if len(probabilities.shape) == 2:
4790 mapped_probabilities = np.zeros(
4791 (
4792 probabilities.shape[0],
4793 len(set(mappings.values())) - self._outliers,
4794 )
4795 )
4796 for from_topic, to_topic in mappings.items():
4797 if to_topic != -1 and from_topic != -1:
4798 mapped_probabilities[:, to_topic] += probabilities[:, from_topic]
4799
4800 return mapped_probabilities
4801
4802 return probabilities
4803
4804 def _preprocess_text(self, documents: np.ndarray) -> List[str]:
4805 r"""Basic preprocessing of text.

Callers 5

fit_transformMethod · 0.95
transformMethod · 0.95
merge_topicsMethod · 0.95
delete_topicsMethod · 0.95
reduce_topicsMethod · 0.95

Calls 1

get_mappingsMethod · 0.80

Tested by

no test coverage detected