MCPcopy
hub / github.com/llmware-ai/llmware / __init__

Method __init__

llmware/retrieval.py:116–263  ·  view source on GitHub ↗
(self, library, embedding_model=None, tokenizer=None, vector_db_api_key=None,
                 query_id=None, from_hf=False, from_sentence_transformer=False,embedding_model_name=None,
                 save_history=True, query_mode=None, vector_db=None, model_api_key=None)

Source from the content-addressed store, hash-verified

114 """
115
116 def __init__(self, library, embedding_model=None, tokenizer=None, vector_db_api_key=None,
117 query_id=None, from_hf=False, from_sentence_transformer=False,embedding_model_name=None,
118 save_history=True, query_mode=None, vector_db=None, model_api_key=None):
119
120 # load user profile & instantiate core library assets linked to profile
121
122 self.library = library
123
124 if library:
125 self.library_name = library.library_name
126 self.account_name = library.account_name
127 else:
128 # throw error if library object does not have library_name and account_name attributes
129 raise LLMWareException(message= f"Query - init - library object not found - {library}")
130
131 # explicitly pass name of embedding model, if multiple embeddings on library
132 self.embedding_model_name = embedding_model_name
133
134 # added option to pass embedding_model and tokenizer
135 self.user_passed_model = embedding_model
136 self.user_passed_tokenizer = tokenizer
137 self.from_hf = from_hf
138 self.from_sentence_transformer = from_sentence_transformer
139
140 # edge case - if a user tries to load a sentence_transformer model but does not pass a model name
141 if from_sentence_transformer and not embedding_model_name:
142 raise LLMWareException(message=f"Query - init - to use sentence_transformers, please "
143 f"provide the model name directly to load")
144
145 # load default configs
146 # embedding initialization parameters
147 self.query_embedding = None
148 self.embedding_model = None
149 self.embedding_db = None
150 self.embeddings = None
151 self.model_api_key = model_api_key
152
153 if self.library:
154 self.embeddings = EmbeddingHandler(self.library)
155
156 self.semantic_distance_threshold = 1000 # basic shut off at such a high level
157
158 # keys that will be included in query results
159
160 # full list
161 self.query_result_standard_keys = ["_id", "text", "doc_ID", "block_ID","page_num","content_type",
162 "author_or_speaker", "special_field1", "file_source","added_to_collection",
163 "table", "coords_x", "coords_y", "coords_cx", "coords_cy", "external_files",
164 "score", "similarity", "distance", "matches"]
165
166 # short_list
167 self.query_result_short_keys = ["text", "file_source", "page_num", "score", "distance","matches"]
168
169 # minimum_list
170 self.query_result_min_required_keys = ["text", "file_source", "page_num"]
171
172 # default - set at 'full list'
173 self.query_result_return_keys = self.query_result_standard_keys

Callers

nothing calls this directly

Calls 9

load_embedding_modelMethod · 0.95
load_query_stateMethod · 0.95
LLMWareExceptionClass · 0.90
EmbeddingHandlerClass · 0.90
QueryStateClass · 0.90
LLMWareConfigClass · 0.90
issue_new_query_idMethod · 0.80
get_query_pathMethod · 0.80
get_embedding_statusMethod · 0.45

Tested by

no test coverage detected