adi-123 commited on
Commit
0106e5c
1 Parent(s): 0d916e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -17
app.py CHANGED
@@ -8,7 +8,6 @@ from langchain.prompts import PromptTemplate
8
  from dotenv import load_dotenv
9
  from langchain_community.embeddings import HuggingFaceBgeEmbeddings
10
  from langchain import HuggingFaceHub
11
- from dotenv import load_dotenv
12
 
13
  def get_pdf_text(pdf_docs):
14
  """Extracts text from all pages of provided PDF documents"""
@@ -19,25 +18,17 @@ def get_pdf_text(pdf_docs):
19
  text += page.extract_text()
20
  return text
21
 
22
-
23
  def get_text_chunks(text):
24
  """Splits text into chunks of 10,000 characters with 1,000 character overlap"""
25
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
26
  chunks = text_splitter.split_text(text)
27
  return chunks
28
 
29
-
30
- def get_vector_store(text_chunks):
31
  """Creates and saves a FAISS vector store from text chunks"""
32
- model_name = "BAAI/bge-large-en"
33
- model_kwargs = {'device': 'cpu'}
34
- encode_kwargs = {'normalize_embeddings': True}
35
- hf = HuggingFaceBgeEmbeddings(model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs)
36
-
37
  vector_store = FAISS.from_texts(text_chunks, embedding=hf)
38
  vector_store.save_local("faiss_index")
39
 
40
-
41
  def get_conversational_chain():
42
  """Creates and returns a conversational chain for question answering"""
43
  prompt_template = """Answer the question concisely, focusing on the most relevant and important details from the PDF context. Refrain from mentioning any mathematical equations, even if they are present in provided context. Focus on the textual information available. Please provide direct quotations or references from PDF to back up your response. If the answer is not found within the PDF, please state "answer is not available in the context."\n\nContext:\n {context}?\nQuestion: \n{question}\nExample response format:Overview: (brief summary or introduction)Key points: (point 1: paragraph for key details)(point 2: paragraph for key details)...Use a mix of paragraphs and points to effectively convey the information."""
@@ -48,8 +39,7 @@ def get_conversational_chain():
48
  chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
49
  return chain
50
 
51
-
52
- def user_input(user_question):
53
  """Processes user question and provides a response"""
54
  try:
55
  new_db = FAISS.load_local("faiss_index", hf, allow_dangerous_deserialization=True)
@@ -65,7 +55,6 @@ def user_input(user_question):
65
  )
66
  st.write("Reply: ", response["output_text"], "")
67
 
68
-
69
  def main():
70
  """Streamlit UI"""
71
  st.set_page_config(page_title="Chat with PDFs", page_icon="")
@@ -73,8 +62,6 @@ def main():
73
  st.header("RAG based Chatbot ")
74
 
75
  user_question = st.text_input("Ask a Question from PDF file(s)")
76
- if user_question:
77
- user_input(user_question)
78
 
79
  with st.sidebar:
80
  st.title("Menu ✨")
@@ -85,10 +72,17 @@ def main():
85
  try:
86
  raw_text = get_pdf_text(pdf_docs)
87
  text_chunks = get_text_chunks(raw_text)
88
- get_vector_store(text_chunks)
 
 
 
 
89
  st.success("Done ✨")
90
  except Exception as e:
91
- st.error(f"An error occurred: {e}")
 
 
 
92
 
93
  if __name__ == "__main__":
94
  main()
 
8
  from dotenv import load_dotenv
9
  from langchain_community.embeddings import HuggingFaceBgeEmbeddings
10
  from langchain import HuggingFaceHub
 
11
 
12
  def get_pdf_text(pdf_docs):
13
  """Extracts text from all pages of provided PDF documents"""
 
18
  text += page.extract_text()
19
  return text
20
 
 
21
  def get_text_chunks(text):
22
  """Splits text into chunks of 10,000 characters with 1,000 character overlap"""
23
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
24
  chunks = text_splitter.split_text(text)
25
  return chunks
26
 
27
+ def get_vector_store(text_chunks, hf):
 
28
  """Creates and saves a FAISS vector store from text chunks"""
 
 
 
 
 
29
  vector_store = FAISS.from_texts(text_chunks, embedding=hf)
30
  vector_store.save_local("faiss_index")
31
 
 
32
  def get_conversational_chain():
33
  """Creates and returns a conversational chain for question answering"""
34
  prompt_template = """Answer the question concisely, focusing on the most relevant and important details from the PDF context. Refrain from mentioning any mathematical equations, even if they are present in provided context. Focus on the textual information available. Please provide direct quotations or references from PDF to back up your response. If the answer is not found within the PDF, please state "answer is not available in the context."\n\nContext:\n {context}?\nQuestion: \n{question}\nExample response format:Overview: (brief summary or introduction)Key points: (point 1: paragraph for key details)(point 2: paragraph for key details)...Use a mix of paragraphs and points to effectively convey the information."""
 
39
  chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
40
  return chain
41
 
42
+ def user_input(user_question, hf):
 
43
  """Processes user question and provides a response"""
44
  try:
45
  new_db = FAISS.load_local("faiss_index", hf, allow_dangerous_deserialization=True)
 
55
  )
56
  st.write("Reply: ", response["output_text"], "")
57
 
 
58
  def main():
59
  """Streamlit UI"""
60
  st.set_page_config(page_title="Chat with PDFs", page_icon="")
 
62
  st.header("RAG based Chatbot ")
63
 
64
  user_question = st.text_input("Ask a Question from PDF file(s)")
 
 
65
 
66
  with st.sidebar:
67
  st.title("Menu ✨")
 
72
  try:
73
  raw_text = get_pdf_text(pdf_docs)
74
  text_chunks = get_text_chunks(raw_text)
75
+ model_name = "BAAI/bge-large-en"
76
+ model_kwargs = {'device': 'cpu'}
77
+ encode_kwargs = {'normalize_embeddings': True}
78
+ hf = HuggingFaceBgeEmbeddings(model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs)
79
+ get_vector_store(text_chunks, hf)
80
  st.success("Done ✨")
81
  except Exception as e:
82
+ st.error(f"An error occurred: {e}")
83
+
84
+ if user_question:
85
+ user_input(user_question, hf)
86
 
87
  if __name__ == "__main__":
88
  main()