DrishtiSharma commited on
Commit
ccb4e8f
1 Parent(s): fc4cfde

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -11
app.py CHANGED
@@ -19,6 +19,12 @@ model_name = "model-q4_K.gguf"
19
 
20
  #snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
21
 
 
 
 
 
 
 
22
 
23
  def get_pdf_text(pdf_docs):
24
  text = ""
@@ -50,6 +56,7 @@ def get_text_chunks(text):
50
 
51
  #return vectorstore
52
 
 
53
  def get_vectorstore(text_chunks, embedding_model_name="intfloat/multilingual-e5-large"):
54
  embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)
55
  vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
@@ -78,24 +85,28 @@ def get_conversation_chain(vectorstore, model_name):
78
  return conversation_chain
79
 
80
 
 
 
 
 
 
81
  def handle_userinput(user_question):
82
-
83
  response = st.session_state.conversation({'question': user_question})
84
 
85
  st.session_state.chat_history = response['chat_history']
86
-
87
  st.session_state.retrieved_text = response['source_documents']
88
 
89
  for i, (message, text) in enumerate(zip(st.session_state.chat_history, st.session_state.retrieved_text)):
90
- if i % 3 == 0:
91
- st.write(user_template.replace(
92
- "{{MSG}}", message.content), unsafe_allow_html=True)
93
- else:
94
- st.write(bot_template.replace(
95
- "{{MSG}}", message.content), unsafe_allow_html=True)
96
- print(text)
97
- st.write(bot_template.replace(
98
- "{{MSG}}", str(text.page_content)), unsafe_allow_html=True)
 
99
 
100
 
101
  st.set_page_config(page_title="Chat with multiple PDFs",
@@ -116,6 +127,7 @@ if user_question:
116
  with st.sidebar:
117
  st.subheader("Your documents")
118
  embedding_model_name = st.selectbox("Select embedding model", ["intfloat/multilingual-e5-large", "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"])
 
119
  pdf_docs = st.file_uploader(
120
  "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
121
  if st.button("Process"):
 
19
 
20
  #snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
21
 
22
+ from transformers import pipeline
23
+
24
+ # Initialize the summarization pipeline
25
+ summarizer = pipeline("summarization")
26
+
27
+
28
 
29
  def get_pdf_text(pdf_docs):
30
  text = ""
 
56
 
57
  #return vectorstore
58
 
59
+
60
  def get_vectorstore(text_chunks, embedding_model_name="intfloat/multilingual-e5-large"):
61
  embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)
62
  vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
 
85
  return conversation_chain
86
 
87
 
88
+ def summarize_text(text):
89
+ summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
90
+ return summary[0]['summary_text']
91
+
92
+
93
  def handle_userinput(user_question):
 
94
  response = st.session_state.conversation({'question': user_question})
95
 
96
  st.session_state.chat_history = response['chat_history']
 
97
  st.session_state.retrieved_text = response['source_documents']
98
 
99
  for i, (message, text) in enumerate(zip(st.session_state.chat_history, st.session_state.retrieved_text)):
100
+ if i % 2 == 0: # User messages
101
+ st.write(user_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
102
+ else: # Bot messages
103
+ st.write(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
104
+ if summarize_option and text.page_content: # Check if summarization is enabled
105
+ summarized_text = summarize_text(text.page_content)
106
+ st.write(bot_template.replace("{{MSG}}", summarized_text), unsafe_allow_html=True)
107
+ else:
108
+ st.write(bot_template.replace("{{MSG}}", text.page_content), unsafe_allow_html=True)
109
+
110
 
111
 
112
  st.set_page_config(page_title="Chat with multiple PDFs",
 
127
  with st.sidebar:
128
  st.subheader("Your documents")
129
  embedding_model_name = st.selectbox("Select embedding model", ["intfloat/multilingual-e5-large", "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"])
130
+ summarize_option = st.sidebar.checkbox("Enable Summarization", value=False)
131
  pdf_docs = st.file_uploader(
132
  "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
133
  if st.button("Process"):