MCPcopy
hub / github.com/MaartenGr/BERTopic / BaseEmbedder

Class BaseEmbedder

bertopic/backend/_base.py:5–62  ·  view source on GitHub ↗

The Base Embedder used for creating embedding models. Arguments: embedding_model: The main embedding model to be used for extracting document and word embedding word_embedding_model: The embedding model used for extracting word

Source from the content-addressed store, hash-verified

3
4
5class BaseEmbedder:
6 """The Base Embedder used for creating embedding models.
7
8 Arguments:
9 embedding_model: The main embedding model to be used for extracting
10 document and word embedding
11 word_embedding_model: The embedding model used for extracting word
12 embeddings only. If this model is selected,
13 then the `embedding_model` is purely used for
14 creating document embeddings.
15 """
16
17 def __init__(self, embedding_model=None, word_embedding_model=None):
18 self.embedding_model = embedding_model
19 self.word_embedding_model = word_embedding_model
20
21 def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
22 """Embed a list of n documents/words into an n-dimensional
23 matrix of embeddings.
24
25 Arguments:
26 documents: A list of documents or words to be embedded
27 verbose: Controls the verbosity of the process
28
29 Returns:
30 Document/words embeddings with shape (n, m) with `n` documents/words
31 that each have an embeddings size of `m`
32 """
33 pass
34
35 def embed_words(self, words: List[str], verbose: bool = False) -> np.ndarray:
36 """Embed a list of n words into an n-dimensional
37 matrix of embeddings.
38
39 Arguments:
40 words: A list of words to be embedded
41 verbose: Controls the verbosity of the process
42
43 Returns:
44 Word embeddings with shape (n, m) with `n` words
45 that each have an embeddings size of `m`
46
47 """
48 return self.embed(words, verbose)
49
50 def embed_documents(self, document: List[str], verbose: bool = False) -> np.ndarray:
51 """Embed a list of n words into an n-dimensional
52 matrix of embeddings.
53
54 Arguments:
55 document: A list of documents to be embedded
56 verbose: Controls the verbosity of the process
57
58 Returns:
59 Document embeddings with shape (n, m) with `n` documents
60 that each have an embeddings size of `m`
61 """
62 return self.embed(document, verbose)

Callers 1

_create_model_from_filesFunction · 0.90

Calls

no outgoing calls

Tested by

no test coverage detected