(self, library, embedding_model=None, tokenizer=None, vector_db_api_key=None,
query_id=None, from_hf=False, from_sentence_transformer=False,embedding_model_name=None,
save_history=True, query_mode=None, vector_db=None, model_api_key=None)
| 114 | """ |
| 115 | |
| 116 | def __init__(self, library, embedding_model=None, tokenizer=None, vector_db_api_key=None, |
| 117 | query_id=None, from_hf=False, from_sentence_transformer=False,embedding_model_name=None, |
| 118 | save_history=True, query_mode=None, vector_db=None, model_api_key=None): |
| 119 | |
| 120 | # load user profile & instantiate core library assets linked to profile |
| 121 | |
| 122 | self.library = library |
| 123 | |
| 124 | if library: |
| 125 | self.library_name = library.library_name |
| 126 | self.account_name = library.account_name |
| 127 | else: |
| 128 | # throw error if library object does not have library_name and account_name attributes |
| 129 | raise LLMWareException(message= f"Query - init - library object not found - {library}") |
| 130 | |
| 131 | # explicitly pass name of embedding model, if multiple embeddings on library |
| 132 | self.embedding_model_name = embedding_model_name |
| 133 | |
| 134 | # added option to pass embedding_model and tokenizer |
| 135 | self.user_passed_model = embedding_model |
| 136 | self.user_passed_tokenizer = tokenizer |
| 137 | self.from_hf = from_hf |
| 138 | self.from_sentence_transformer = from_sentence_transformer |
| 139 | |
| 140 | # edge case - if a user tries to load a sentence_transformer model but does not pass a model name |
| 141 | if from_sentence_transformer and not embedding_model_name: |
| 142 | raise LLMWareException(message=f"Query - init - to use sentence_transformers, please " |
| 143 | f"provide the model name directly to load") |
| 144 | |
| 145 | # load default configs |
| 146 | # embedding initialization parameters |
| 147 | self.query_embedding = None |
| 148 | self.embedding_model = None |
| 149 | self.embedding_db = None |
| 150 | self.embeddings = None |
| 151 | self.model_api_key = model_api_key |
| 152 | |
| 153 | if self.library: |
| 154 | self.embeddings = EmbeddingHandler(self.library) |
| 155 | |
| 156 | self.semantic_distance_threshold = 1000 # basic shut off at such a high level |
| 157 | |
| 158 | # keys that will be included in query results |
| 159 | |
| 160 | # full list |
| 161 | self.query_result_standard_keys = ["_id", "text", "doc_ID", "block_ID","page_num","content_type", |
| 162 | "author_or_speaker", "special_field1", "file_source","added_to_collection", |
| 163 | "table", "coords_x", "coords_y", "coords_cx", "coords_cy", "external_files", |
| 164 | "score", "similarity", "distance", "matches"] |
| 165 | |
| 166 | # short_list |
| 167 | self.query_result_short_keys = ["text", "file_source", "page_num", "score", "distance","matches"] |
| 168 | |
| 169 | # minimum_list |
| 170 | self.query_result_min_required_keys = ["text", "file_source", "page_num"] |
| 171 | |
| 172 | # default - set at 'full list' |
| 173 | self.query_result_return_keys = self.query_result_standard_keys |
nothing calls this directly
no test coverage detected