Spaces:

ivyblossom
/

question-answering

Running

File size: 1,817 Bytes

4688ae4
e4a1f31
e776a42
23c47e2
4688ae4
e4a1f31
7b208e8
ed5b1fa
e776a42
 
 
c7c4e48
 
 
 
 
e4a1f31
12978ef
663bca5
ed5b1fa
 
 
 
 
 
 
 
 
4688ae4
 
 
272eebb
4688ae4
 
12978ef
 
e776a42
 
011e1bd
75c6f14
 
ed5b1fa
3dd11fc
92bd63a
75c6f14
 
7b208e8
ed5b1fa
6d20f5a

import os
import streamlit as st
from transformers import pipeline
from PyPDF2 import PdfReader
import tempfile

# Function to perform question-answering
@st.cache_data(show_spinner=False)
def question_answering(questions, pdf_text):
    # Perform question-answering using Hugging Face's Transformers
    question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")

    answers = []
    for question in questions:
        answer = question_answerer(question=question, context=pdf_text)
        answers.append(answer)

    return answers

def main():
    st.title("Question Answering on PDF Files")

    uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])

    st.write("Enter your question(s) below (separate multiple questions with new lines):")
    questions = st.text_area("Questions").split('\n')

    if st.button("Answer") and uploaded_file is not None:
        pdf_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
        with open(pdf_path, "wb") as f:
            f.write(uploaded_file.read())

        # Read PDF text once and cache it for batch processing
        pdf_reader = PdfReader(pdf_path)
        pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])

        # Perform question-answering in batches
        answers = question_answering(questions, pdf_text)

        # Display the results as a table with a header row
        table_data = [["Question", "Answer", "Score"]]
        for i, (question, answer) in enumerate(zip(questions, answers)):
            table_data.append([question, answer['answer'], f"{answer['score']:.2f}"])
        
        st.write("Questions and Answers:")
        st.table(table_data)

if __name__ == "__main__":
    main()