Implements the interface to manage a collection of unstructured information as a ``Library``, i.e. a library is an indexed collection of texts, tables and images extracted from parsed files. Returns ------- library : Library A new ``Library`` object.
| 36 | |
| 37 | |
| 38 | class Library: |
| 39 | |
| 40 | """Implements the interface to manage a collection of unstructured information as a ``Library``, i.e. a |
| 41 | library is an indexed collection of texts, tables and images extracted from parsed files. |
| 42 | |
| 43 | Returns |
| 44 | ------- |
| 45 | library : Library |
| 46 | A new ``Library`` object. |
| 47 | """ |
| 48 | |
| 49 | def __init__(self): |
| 50 | |
| 51 | # default settings for basic parameters |
| 52 | self.account_name = None |
| 53 | self.library_name = None |
| 54 | |
| 55 | # base file paths in each library |
| 56 | self.library_main_path = None |
| 57 | |
| 58 | # each of these paths hang off library_main_path |
| 59 | self.file_copy_path = None |
| 60 | self.image_path = None |
| 61 | self.dataset_path = None |
| 62 | self.nlp_path = None |
| 63 | self.output_path = None |
| 64 | self.tmp_path = None |
| 65 | self.embedding_path = None |
| 66 | |
| 67 | # default key structure of block -> re-order for nicer display |
| 68 | self.default_keys = ["block_ID", "doc_ID", "content_type", "file_type","master_index","master_index2", |
| 69 | "coords_x", "coords_y", "coords_cx", "coords_cy", "author_or_speaker", "modified_date", |
| 70 | "created_date", "creator_tool", "added_to_collection", "file_source", |
| 71 | # changing to 'table_block' and 'text_block' |
| 72 | "table_block", "external_files", "text_block", "header_text", "text_search", |
| 73 | "user_tags", "special_field1", "special_field2", "special_field3","graph_status","dialog", |
| 74 | "embedding_flags"] |
| 75 | |
| 76 | self.library_block_schema = LLMWareTableSchema().get_block_schema() |
| 77 | |
| 78 | # default library card elements |
| 79 | self.default_library_card = ["library_name", "embedding_status", "embedding_model", "embedding_db", |
| 80 | "embedded_blocks", "embedding_dims", "time_stamp", |
| 81 | "knowledge_graph", "unique_doc_id", "documents", "blocks", "images", "pages", |
| 82 | "tables"] |
| 83 | |
| 84 | self.block_size_target_characters = 400 |
| 85 | |
| 86 | # attributes used in parsing workflow |
| 87 | self.doc_ID = 0 |
| 88 | self.block_ID = 0 |
| 89 | |
| 90 | # check for llmware path & create if not already set up |
| 91 | if not os.path.exists(LLMWareConfig.get_llmware_path()): |
| 92 | |
| 93 | # if not explicitly set up by user, then create folder directory structure |
| 94 | LLMWareConfig.setup_llmware_workspace() |
| 95 |
no outgoing calls