| 52 | |
| 53 | |
| 54 | def main(): |
| 55 | st.header("📄Chat with your pdf file") |
| 56 | |
| 57 | # upload a your pdf file |
| 58 | pdf = st.file_uploader("Upload your PDF", type="pdf") |
| 59 | |
| 60 | if pdf is not None: |
| 61 | pdf_reader = PdfReader(pdf) |
| 62 | |
| 63 | text = "" |
| 64 | for page in pdf_reader.pages: |
| 65 | text += page.extract_text() |
| 66 | |
| 67 | # langchain_textspliter |
| 68 | text_splitter = RecursiveCharacterTextSplitter( |
| 69 | chunk_size=1000, chunk_overlap=200, length_function=len |
| 70 | ) |
| 71 | |
| 72 | chunks = text_splitter.split_text(text=text) |
| 73 | qa_prompt = ChatPromptTemplate.from_messages( |
| 74 | [ |
| 75 | ( |
| 76 | "human", |
| 77 | "Based on the provided summary: {summaries} \n Answer the following question:{question}", |
| 78 | ) |
| 79 | ] |
| 80 | ) |
| 81 | |
| 82 | # Store the chunks part in db (vector) |
| 83 | vectorstore = Neo4jVector.from_texts( |
| 84 | chunks, |
| 85 | url=url, |
| 86 | username=username, |
| 87 | password=password, |
| 88 | embedding=embeddings, |
| 89 | index_name="pdf_bot", |
| 90 | node_label="PdfBotChunk", |
| 91 | pre_delete_collection=True, # Delete existing PDF data |
| 92 | ) |
| 93 | qa = ( |
| 94 | RunnableParallel( |
| 95 | { |
| 96 | "summaries": vectorstore.as_retriever(search_kwargs={"k": 2}) |
| 97 | | format_docs, |
| 98 | "question": RunnablePassthrough(), |
| 99 | } |
| 100 | ) |
| 101 | | qa_prompt |
| 102 | | llm |
| 103 | | StrOutputParser() |
| 104 | ) |
| 105 | |
| 106 | # Accept user questions/query |
| 107 | query = st.text_input("Ask questions about your PDF file") |
| 108 | |
| 109 | if query: |
| 110 | stream_handler = StreamHandler(st.empty()) |
| 111 | qa.invoke(query, {"callbacks": [stream_handler]}) |