Select an embedding model based on language or a specific provided model. When selecting a language, we choose all-MiniLM-L6-v2 for English and paraphrase-multilingual-MiniLM-L12-v2 for all other languages as it support 100+ languages. If sentence-transformers is not installed, in the ca
(embedding_model, language: str | None = None, verbose: bool = False)
| 69 | |
| 70 | |
| 71 | def select_backend(embedding_model, language: str | None = None, verbose: bool = False) -> BaseEmbedder: |
| 72 | """Select an embedding model based on language or a specific provided model. |
| 73 | When selecting a language, we choose all-MiniLM-L6-v2 for English and |
| 74 | paraphrase-multilingual-MiniLM-L12-v2 for all other languages as it support 100+ languages. |
| 75 | If sentence-transformers is not installed, in the case of a lightweight installation, |
| 76 | a scikit-learn backend is default. |
| 77 | |
| 78 | Returns: |
| 79 | model: The selected model backend. |
| 80 | """ |
| 81 | logger.set_level("INFO" if verbose else "WARNING") |
| 82 | |
| 83 | # BERTopic language backend |
| 84 | if isinstance(embedding_model, BaseEmbedder): |
| 85 | return embedding_model |
| 86 | |
| 87 | # Scikit-learn backend |
| 88 | if isinstance(embedding_model, ScikitPipeline): |
| 89 | return SklearnEmbedder(embedding_model) |
| 90 | |
| 91 | # Flair word embeddings |
| 92 | if "flair" in str(type(embedding_model)): |
| 93 | from bertopic.backend._flair import FlairBackend |
| 94 | |
| 95 | return FlairBackend(embedding_model) |
| 96 | |
| 97 | # Spacy embeddings |
| 98 | if "spacy" in str(type(embedding_model)): |
| 99 | from bertopic.backend._spacy import SpacyBackend |
| 100 | |
| 101 | return SpacyBackend(embedding_model) |
| 102 | |
| 103 | # Gensim embeddings |
| 104 | if "gensim" in str(type(embedding_model)): |
| 105 | from bertopic.backend._gensim import GensimBackend |
| 106 | |
| 107 | return GensimBackend(embedding_model) |
| 108 | |
| 109 | # USE embeddings |
| 110 | if "tensorflow" and "saved_model" in str(type(embedding_model)): |
| 111 | from bertopic.backend._use import USEBackend |
| 112 | |
| 113 | return USEBackend(embedding_model) |
| 114 | |
| 115 | # Sentence Transformer embeddings |
| 116 | if "sentence_transformers" in str(type(embedding_model)) or isinstance(embedding_model, str): |
| 117 | from ._sentencetransformers import SentenceTransformerBackend |
| 118 | |
| 119 | return SentenceTransformerBackend(embedding_model) |
| 120 | |
| 121 | # Hugging Face embeddings |
| 122 | if "transformers" and "pipeline" in str(type(embedding_model)): |
| 123 | from ._hftransformers import HFTransformerBackend |
| 124 | |
| 125 | return HFTransformerBackend(embedding_model) |
| 126 | |
| 127 | # Model2Vec embeddings |
| 128 | if "model2vec" in str(type(embedding_model)): |
no test coverage detected