(library_name)
| 19 | |
| 20 | |
| 21 | def rag (library_name): |
| 22 | |
| 23 | # Step 0 - Configuration - we will use these in Step 4 to install the embeddings |
| 24 | embedding_model = "industry-bert-contracts" |
| 25 | vector_db = "milvus" |
| 26 | |
| 27 | # Step 1 - Create library which is the main 'organizing construct' in llmware |
| 28 | print ("\nupdate: Step 1 - Creating library: {}".format(library_name)) |
| 29 | |
| 30 | library = Library().create_new_library(library_name) |
| 31 | |
| 32 | # Step 2 - Pull down the sample files from S3 through the .load_sample_files() command |
| 33 | # --note: if you need to refresh the sample files, set 'over_write=True' |
| 34 | print ("update: Step 2 - Downloading Sample Files") |
| 35 | |
| 36 | sample_files_path = Setup().load_sample_files(over_write=False) |
| 37 | contracts_path = os.path.join(sample_files_path, "Agreements") |
| 38 | |
| 39 | # Step 3 - point ".add_files" method to the folder of documents that was just created |
| 40 | # this method parses all of the documents, text chunks, and captures in MongoDB |
| 41 | print("update: Step 3 - Parsing and Text Indexing Files") |
| 42 | |
| 43 | library.add_files(input_folder_path=contracts_path) |
| 44 | |
| 45 | # Step 4 - Install the embeddings |
| 46 | print("\nupdate: Step 4 - Generating Embeddings in {} db - with Model- {}".format(vector_db, embedding_model)) |
| 47 | |
| 48 | library.install_new_embedding(embedding_model_name=embedding_model, vector_db=vector_db) |
| 49 | |
| 50 | # note: for using llmware as part of a larger application, you can check the real-time status by polling Status() |
| 51 | # --both the EmbeddingHandler and Parsers write to Status() at intervals while processing |
| 52 | update = Status().get_embedding_status(library_name, embedding_model) |
| 53 | print("update: Embeddings Complete - Status() check at end of embedding - ", update) |
| 54 | |
| 55 | print("\nupdate: Loading 1B parameter BLING model for LLM inference") |
| 56 | |
| 57 | prompter = Prompt().load_model("llmware/bling-1b-0.1") |
| 58 | query = "what is the executive's base annual salary" |
| 59 | |
| 60 | results = Query(library).semantic_query(query, result_count=50, embedding_distance_threshold=1.0) |
| 61 | |
| 62 | for i, res in enumerate(results): |
| 63 | |
| 64 | print("update: ", i, res["file_source"], res["distance"], res["text"]) |
| 65 | |
| 66 | for i, contract in enumerate(os.listdir(contracts_path)): |
| 67 | |
| 68 | qr = [] |
| 69 | |
| 70 | if contract != ".DS_Store": |
| 71 | |
| 72 | print("\nContract Name: ", i, contract) |
| 73 | |
| 74 | for j, entries in enumerate(results): |
| 75 | if entries["file_source"] == contract: |
| 76 | print("Top Retrieval: ", j, entries["distance"], entries["text"]) |
| 77 | qr.append(entries) |
| 78 |
no test coverage detected