MCPcopy
hub / github.com/MaartenGr/BERTopic / select_backend

Function select_backend

bertopic/backend/_utils.py:71–171  ·  view source on GitHub ↗

Select an embedding model based on language or a specific provided model. When selecting a language, we choose all-MiniLM-L6-v2 for English and paraphrase-multilingual-MiniLM-L12-v2 for all other languages as it support 100+ languages. If sentence-transformers is not installed, in the ca

(embedding_model, language: str | None = None, verbose: bool = False)

Source from the content-addressed store, hash-verified

69
70
71def select_backend(embedding_model, language: str | None = None, verbose: bool = False) -> BaseEmbedder:
72 """Select an embedding model based on language or a specific provided model.
73 When selecting a language, we choose all-MiniLM-L6-v2 for English and
74 paraphrase-multilingual-MiniLM-L12-v2 for all other languages as it support 100+ languages.
75 If sentence-transformers is not installed, in the case of a lightweight installation,
76 a scikit-learn backend is default.
77
78 Returns:
79 model: The selected model backend.
80 """
81 logger.set_level("INFO" if verbose else "WARNING")
82
83 # BERTopic language backend
84 if isinstance(embedding_model, BaseEmbedder):
85 return embedding_model
86
87 # Scikit-learn backend
88 if isinstance(embedding_model, ScikitPipeline):
89 return SklearnEmbedder(embedding_model)
90
91 # Flair word embeddings
92 if "flair" in str(type(embedding_model)):
93 from bertopic.backend._flair import FlairBackend
94
95 return FlairBackend(embedding_model)
96
97 # Spacy embeddings
98 if "spacy" in str(type(embedding_model)):
99 from bertopic.backend._spacy import SpacyBackend
100
101 return SpacyBackend(embedding_model)
102
103 # Gensim embeddings
104 if "gensim" in str(type(embedding_model)):
105 from bertopic.backend._gensim import GensimBackend
106
107 return GensimBackend(embedding_model)
108
109 # USE embeddings
110 if "tensorflow" and "saved_model" in str(type(embedding_model)):
111 from bertopic.backend._use import USEBackend
112
113 return USEBackend(embedding_model)
114
115 # Sentence Transformer embeddings
116 if "sentence_transformers" in str(type(embedding_model)) or isinstance(embedding_model, str):
117 from ._sentencetransformers import SentenceTransformerBackend
118
119 return SentenceTransformerBackend(embedding_model)
120
121 # Hugging Face embeddings
122 if "transformers" and "pipeline" in str(type(embedding_model)):
123 from ._hftransformers import HFTransformerBackend
124
125 return HFTransformerBackend(embedding_model)
126
127 # Model2Vec embeddings
128 if "model2vec" in str(type(embedding_model)):

Callers 6

fit_transformMethod · 0.90
partial_fitMethod · 0.90
loadMethod · 0.90
merge_modelsMethod · 0.90
_create_model_from_filesFunction · 0.90
__init__Method · 0.90

Calls 11

SklearnEmbedderClass · 0.90
FlairBackendClass · 0.90
SpacyBackendClass · 0.90
GensimBackendClass · 0.90
USEBackendClass · 0.90
FastEmbedBackendClass · 0.90
Model2VecBackendClass · 0.85
set_levelMethod · 0.80
infoMethod · 0.80

Tested by

no test coverage detected