MCPcopy
hub / github.com/MaartenGr/BERTopic / _reduce_dimensionality

Method _reduce_dimensionality

bertopic/_bertopic.py:3907–3956  ·  view source on GitHub ↗

Reduce dimensionality of embeddings using UMAP and train a UMAP model. Arguments: embeddings: The extracted embeddings using the sentence transformer module. y: The target class for (semi)-supervised dimensionality reduction partial_fit: Whether to run `p

(
        self,
        embeddings: Union[np.ndarray, csr_matrix],
        y: Union[List[int], np.ndarray] = None,
        partial_fit: bool = False,
    )

Source from the content-addressed store, hash-verified

3905 return mapped_predictions
3906
3907 def _reduce_dimensionality(
3908 self,
3909 embeddings: Union[np.ndarray, csr_matrix],
3910 y: Union[List[int], np.ndarray] = None,
3911 partial_fit: bool = False,
3912 ) -> np.ndarray:
3913 """Reduce dimensionality of embeddings using UMAP and train a UMAP model.
3914
3915 Arguments:
3916 embeddings: The extracted embeddings using the sentence transformer module.
3917 y: The target class for (semi)-supervised dimensionality reduction
3918 partial_fit: Whether to run `partial_fit` for online learning
3919
3920 Returns:
3921 umap_embeddings: The reduced embeddings
3922 """
3923 logger.info("Dimensionality - Fitting the dimensionality reduction algorithm")
3924 # Partial fit
3925 if partial_fit:
3926 if hasattr(self.umap_model, "partial_fit"):
3927 self.umap_model = self.umap_model.partial_fit(embeddings)
3928 umap_embeddings = self.umap_model.transform(embeddings)
3929 elif self.topic_representations_ is None:
3930 if hasattr(self.umap_model, "fit_transform"):
3931 umap_embeddings = self.umap_model.fit_transform(embeddings)
3932 else:
3933 self.umap_model.fit(embeddings)
3934 umap_embeddings = self.umap_model.transform(embeddings)
3935 else:
3936 umap_embeddings = self.umap_model.transform(embeddings)
3937
3938 # Regular fit
3939 else:
3940 try:
3941 # cuml umap needs y to be an numpy array
3942 y = np.array(y) if y is not None else None
3943 if hasattr(self.umap_model, "fit_transform"):
3944 umap_embeddings = self.umap_model.fit_transform(embeddings, y=y)
3945 else:
3946 self.umap_model.fit(embeddings, y=y)
3947 umap_embeddings = self.umap_model.transform(embeddings)
3948 except TypeError:
3949 if hasattr(self.umap_model, "fit_transform"):
3950 umap_embeddings = self.umap_model.fit_transform(embeddings, y=y)
3951 else:
3952 self.umap_model.fit(embeddings, y=y)
3953 umap_embeddings = self.umap_model.transform(embeddings)
3954
3955 logger.info("Dimensionality - Completed \u2713")
3956 return np.nan_to_num(umap_embeddings)
3957
3958 def _cluster_embeddings(
3959 self,

Callers 4

fit_transformMethod · 0.95
partial_fitMethod · 0.95

Calls 5

infoMethod · 0.80
fit_transformMethod · 0.80
partial_fitMethod · 0.45
transformMethod · 0.45
fitMethod · 0.45

Tested by 2