Function main

pdf_bot.py:54–111 · view source on GitHub ↗

()

Source from the content-addressed store, hash-verified

52
53
54	def main():
55	st.header("📄Chat with your pdf file")
56
57	# upload a your pdf file
58	pdf = st.file_uploader("Upload your PDF", type="pdf")
59
60	if pdf is not None:
61	pdf_reader = PdfReader(pdf)
62
63	text = ""
64	for page in pdf_reader.pages:
65	text += page.extract_text()
66
67	# langchain_textspliter
68	text_splitter = RecursiveCharacterTextSplitter(
69	chunk_size=1000, chunk_overlap=200, length_function=len
70	)
71
72	chunks = text_splitter.split_text(text=text)
73	qa_prompt = ChatPromptTemplate.from_messages(
74	[
75	(
76	"human",
77	"Based on the provided summary: {summaries} \n Answer the following question:{question}",
78	)
79	]
80	)
81
82	# Store the chunks part in db (vector)
83	vectorstore = Neo4jVector.from_texts(
84	chunks,
85	url=url,
86	username=username,
87	password=password,
88	embedding=embeddings,
89	index_name="pdf_bot",
90	node_label="PdfBotChunk",
91	pre_delete_collection=True, # Delete existing PDF data
92	)
93	qa = (
94	RunnableParallel(
95	{
96	"summaries": vectorstore.as_retriever(search_kwargs={"k": 2})
97	\| format_docs,
98	"question": RunnablePassthrough(),
99	}
100	)
101	\| qa_prompt
102	\| llm
103	\| StrOutputParser()
104	)
105
106	# Accept user questions/query
107	query = st.text_input("Ask questions about your PDF file")
108
109	if query:
110	stream_handler = StreamHandler(st.empty())
111	qa.invoke(query, {"callbacks": [stream_handler]})

pdf_bot.pyFile · 0.85

StreamHandlerClass · 0.70

no test coverage detected