Spaces:

ivyblossom
/

question-answering

Running

App Files Files Community

ivyblossom commited on Aug 4, 2023

Commit

9988861

•

1 Parent(s): 663bca5

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -24

app.py CHANGED Viewed

@@ -1,15 +1,8 @@
 import os
 import streamlit as st
 from transformers import pipeline
-import re
 from PyPDF2 import PdfReader
-# Function to truncate text to the nearest word boundary
-def truncate_to_word_boundary(text, max_words=100):
-    words = re.findall(r'\w+', text)
-    truncated_text = ' '.join(words[:max_words])
-    return truncated_text
 # Function to perform question-answering
 def question_answering(question, pdf_path):
     pdf_reader = PdfReader(pdf_path)
@@ -25,27 +18,20 @@ def question_answering(question, pdf_path):
     question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
     answer = question_answerer(question=question, context=pdf_text)
-    return answer, pdf_text_with_pages
-def get_context_text(pdf_text_with_pages, context_page_num, context_window=3):
-    context_start = max(0, context_page_num - context_window - 1)
-    context_end = min(len(pdf_text_with_pages), context_page_num + context_window)
-    context_lines = [text for _, text in pdf_text_with_pages[context_start:context_end]]
-    context_text = "\n".join(context_lines)
-    return context_text
 def main():
     st.title("Question Answering on a PDF File")
     uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
     question = st.text_input("Ask your question:")
     if st.button("Answer") and uploaded_file is not None:
         pdf_path = os.path.join(os.getcwd(), uploaded_file.name)
         with open(pdf_path, "wb") as f:
             f.write(uploaded_file.read())
-        answer, pdf_text_with_pages = question_answering(question, pdf_path)
         # Delete the uploaded file after processing
         os.remove(pdf_path)
@@ -54,11 +40,5 @@ def main():
         st.write("Answer:", answer['answer'])
         st.write("Score:", answer['score'])
-        # Display context where the answer came from
-        context_page_num = answer['start']
-        context_text = get_context_text(pdf_text_with_pages, context_page_num)
-        st.write("Context:")
-        st.write(context_text)
 if __name__ == "__main__":
-    main()

 import os
 import streamlit as st
 from transformers import pipeline
 from PyPDF2 import PdfReader
 # Function to perform question-answering
 def question_answering(question, pdf_path):
     pdf_reader = PdfReader(pdf_path)
     question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
     answer = question_answerer(question=question, context=pdf_text)
+    return answer
 def main():
     st.title("Question Answering on a PDF File")
     uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
     question = st.text_input("Ask your question:")
     if st.button("Answer") and uploaded_file is not None:
         pdf_path = os.path.join(os.getcwd(), uploaded_file.name)
         with open(pdf_path, "wb") as f:
             f.write(uploaded_file.read())
+        answer = question_answering(question, pdf_path)
         # Delete the uploaded file after processing
         os.remove(pdf_path)
         st.write("Answer:", answer['answer'])
         st.write("Score:", answer['score'])
 if __name__ == "__main__":
+    main()