File size: 1,817 Bytes
4688ae4
e4a1f31
e776a42
23c47e2
4688ae4
e4a1f31
7b208e8
ed5b1fa
e776a42
 
 
c7c4e48
 
 
 
 
e4a1f31
12978ef
663bca5
ed5b1fa
 
 
 
 
 
 
 
 
4688ae4
 
 
272eebb
4688ae4
 
12978ef
 
e776a42
 
011e1bd
75c6f14
 
ed5b1fa
3dd11fc
92bd63a
75c6f14
 
7b208e8
ed5b1fa
6d20f5a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import os
import streamlit as st
from transformers import pipeline
from PyPDF2 import PdfReader
import tempfile

# Function to perform question-answering
@st.cache_data(show_spinner=False)
def question_answering(questions, pdf_text):
    # Perform question-answering using Hugging Face's Transformers
    question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")

    answers = []
    for question in questions:
        answer = question_answerer(question=question, context=pdf_text)
        answers.append(answer)

    return answers

def main():
    st.title("Question Answering on PDF Files")

    uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])

    st.write("Enter your question(s) below (separate multiple questions with new lines):")
    questions = st.text_area("Questions").split('\n')

    if st.button("Answer") and uploaded_file is not None:
        pdf_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
        with open(pdf_path, "wb") as f:
            f.write(uploaded_file.read())

        # Read PDF text once and cache it for batch processing
        pdf_reader = PdfReader(pdf_path)
        pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])

        # Perform question-answering in batches
        answers = question_answering(questions, pdf_text)

        # Display the results as a table with a header row
        table_data = [["Question", "Answer", "Score"]]
        for i, (question, answer) in enumerate(zip(questions, answers)):
            table_data.append([question, answer['answer'], f"{answer['score']:.2f}"])
        
        st.write("Questions and Answers:")
        st.table(table_data)

if __name__ == "__main__":
    main()