itsme-nishanth commited on
Commit
7eb9150
1 Parent(s): 7e5fea1

Create app.py

Browse files

adding first version

Files changed (1) hide show
  1. app.py +94 -0
app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_community.llms import HuggingFaceEndpoint
3
+ from langchain_core.runnables import RunnablePassthrough
4
+ from langchain_core.output_parsers import StrOutputParser
5
+ from langchain.prompts import ChatPromptTemplate
6
+ from PyPDF2 import PdfReader
7
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
8
+ import os
9
+ from langchain_community.vectorstores import Chroma
10
+ from langchain.chains.question_answering import load_qa_chain
11
+ from langchain.prompts import PromptTemplate
12
+ from langchain_community.document_loaders import PyPDFLoader
13
+ from langchain_chroma import Chroma
14
+ from langchain_community.embeddings import HuggingFaceEmbeddings
15
+ from transformers import pipeline
16
+
17
+ def get_pdf(pdf_docs):
18
+ docs=[]
19
+ for pdf in pdf_docs:
20
+ temp_file = "./temp.pdf"
21
+ # Delete the existing temp.pdf file if it exists
22
+ if os.path.exists(temp_file):
23
+ os.remove(temp_file)
24
+ with open(temp_file, "wb") as file:
25
+ file.write(pdf.getvalue())
26
+ file_name = pdf.name
27
+ loader = PyPDFLoader(temp_file)
28
+ docs.extend(loader.load())
29
+ return docs
30
+
31
+ def text_splitter(text):
32
+ text_splitter = RecursiveCharacterTextSplitter(
33
+ # Set a really small chunk size, just to show.
34
+ chunk_size=10000,
35
+ chunk_overlap=500,
36
+ separators=["\n\n","\n"," ",".",","])
37
+ chunks=text_splitter.split_documents(text)
38
+ return chunks
39
+
40
+
41
+ def get_conversational_chain(retriever):
42
+ prompt_template = """
43
+ Given the following extracted parts of a long document and a question, create a final answer.
44
+ Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
45
+ provided context just say, "answer is not available in the context", and then ignore the context and add the answer from your knowledge like a simple llm prompt.
46
+ Try to give atleast the basic information.Do not return blank answer.\n\n
47
+ Make sure to understand the question and answer as per the question.
48
+ The answer should be a detailed one and try to incorporate examples for better understanding.
49
+ If the question involves terms like detailed or explained , give answer which involves complete detail about the question.\n\n
50
+ Context:\n {context}?\n
51
+ Question: \n{question}\n
52
+ Answer:
53
+ """
54
+ pipeline("text-generation", model="nvidia/Llama3-ChatQA-1.5-8B")
55
+
56
+ pt = ChatPromptTemplate.from_template(prompt_template)
57
+ # Retrieve and generate using the relevant snippets of the blog.
58
+ #retriever = db.as_retriever()
59
+ rag_chain = (
60
+ {"context": retriever, "question": RunnablePassthrough()}
61
+ | pt
62
+ | llm
63
+ | StrOutputParser()
64
+ )
65
+ return rag_chain
66
+
67
+ def embedding(chunk,query):
68
+ embeddings=HuggingFaceEmbeddings()
69
+ db = Chroma.from_documents(chunk,embeddings)
70
+ doc = db.similarity_search(query)
71
+ chain = get_conversational_chain(db.as_retriever())
72
+ response = chain.invoke(query)
73
+ return response
74
+
75
+ if 'messages' not in st.session_state:
76
+ st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Upload a PDF and ask me questions.'}]
77
+
78
+
79
+ st.header("Chat with your pdf")
80
+ with st.sidebar:
81
+ st.title("PDF FILE UPLOAD:")
82
+ pdf_docs = st.file_uploader("Upload your PDF File and Click on the Submit Button", accept_multiple_files=True, key="pdf_uploader")
83
+
84
+ query = st.chat_input("Ask a Question from the PDF File")
85
+ if query:
86
+ raw_text = get_pdf(pdf_docs)
87
+ text_chunks = text_splitter(raw_text)
88
+ st.session_state.messages.append({'role': 'user', "content": query})
89
+ response = embedding(text_chunks,query)
90
+ st.session_state.messages.append({'role': 'assistant', "content": response})
91
+
92
+ for message in st.session_state.messages:
93
+ with st.chat_message(message['role']):
94
+ st.write(message['content'])