Provides callers with an interface on the status of the parsing and embedding process. ``Status`` is the central class for accessing (reading and writing) the status of processes. The intended use case is to be an interface for non-llmware components (the callers) that need informat
| 5173 | |
| 5174 | |
| 5175 | class Status: |
| 5176 | |
| 5177 | """Provides callers with an interface on the status of the parsing and embedding process. |
| 5178 | |
| 5179 | ``Status`` is the central class for accessing (reading and writing) the status of processes. |
| 5180 | The intended use case is to be an interface for non-llmware components (the callers) that need |
| 5181 | information on llmware progress, e.g user interface components may need to change depending on the |
| 5182 | progress of parsing. A status consists of a summary string and metrics that can be used to provide |
| 5183 | graphical widgets an update. If a status is written to SQL collection database, then it will use the |
| 5184 | Status schema defined in configs.py. |
| 5185 | |
| 5186 | Parameters |
| 5187 | ---------- |
| 5188 | account_name : str, optional, default='llmware' |
| 5189 | Sets the name of the account, which is used for writting and retrieving a status. |
| 5190 | |
| 5191 | Returns |
| 5192 | ------- |
| 5193 | status : Status |
| 5194 | A new ``Status`` object. |
| 5195 | |
| 5196 | """ |
| 5197 | |
| 5198 | def __init__(self, account_name="llmware"): |
| 5199 | |
| 5200 | self.account_name = account_name |
| 5201 | self.schema = LLMWareTableSchema.get_status_schema() |
| 5202 | |
| 5203 | # if table does not exist (and required by the underlying collection db), then create |
| 5204 | if CollectionWriter("status", account_name=self.account_name).check_if_table_build_required(): |
| 5205 | # create "status" table |
| 5206 | CollectionWriter("status", account_name=self.account_name).create_table("status", self.schema) |
| 5207 | |
| 5208 | def get_pdf_parsing_status(self, library_name, job_id="0"): |
| 5209 | |
| 5210 | """ Gets the status written by the PDF parser """ |
| 5211 | |
| 5212 | status_key = f"{library_name}_pdf_parser_{job_id}" |
| 5213 | status = CollectionRetrieval("status", account_name=self.account_name).lookup("key", status_key) |
| 5214 | |
| 5215 | return status |
| 5216 | |
| 5217 | def get_office_parsing_status(self, library_name, job_id="0"): |
| 5218 | |
| 5219 | """ Gets the status written by the Office parser """ |
| 5220 | |
| 5221 | status_key = f"{library_name}_office_parser_{job_id}" |
| 5222 | status = CollectionRetrieval("status", account_name=self.account_name).lookup("key", status_key) |
| 5223 | |
| 5224 | return status |
| 5225 | |
| 5226 | # Return the status dict |
| 5227 | def get_embedding_status(self, library_name, embedding_model): |
| 5228 | |
| 5229 | """ Gets the embedding status written by the EmbeddingHandler class and each supported vector DB """ |
| 5230 | |
| 5231 | status_key = self._get_embedding_status_key(library_name, embedding_model) |
| 5232 | status = CollectionRetrieval("status", account_name=self.account_name).lookup("key", status_key) |
no outgoing calls