Spaces:
Running
Running
File size: 1,817 Bytes
4688ae4 e4a1f31 e776a42 23c47e2 4688ae4 e4a1f31 7b208e8 ed5b1fa e776a42 c7c4e48 e4a1f31 12978ef 663bca5 ed5b1fa 4688ae4 272eebb 4688ae4 12978ef e776a42 011e1bd 75c6f14 ed5b1fa 3dd11fc 92bd63a 75c6f14 7b208e8 ed5b1fa 6d20f5a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import os
import streamlit as st
from transformers import pipeline
from PyPDF2 import PdfReader
import tempfile
# Function to perform question-answering
@st.cache_data(show_spinner=False)
def question_answering(questions, pdf_text):
# Perform question-answering using Hugging Face's Transformers
question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
answers = []
for question in questions:
answer = question_answerer(question=question, context=pdf_text)
answers.append(answer)
return answers
def main():
st.title("Question Answering on PDF Files")
uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
st.write("Enter your question(s) below (separate multiple questions with new lines):")
questions = st.text_area("Questions").split('\n')
if st.button("Answer") and uploaded_file is not None:
pdf_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
with open(pdf_path, "wb") as f:
f.write(uploaded_file.read())
# Read PDF text once and cache it for batch processing
pdf_reader = PdfReader(pdf_path)
pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])
# Perform question-answering in batches
answers = question_answering(questions, pdf_text)
# Display the results as a table with a header row
table_data = [["Question", "Answer", "Score"]]
for i, (question, answer) in enumerate(zip(questions, answers)):
table_data.append([question, answer['answer'], f"{answer['score']:.2f}"])
st.write("Questions and Answers:")
st.table(table_data)
if __name__ == "__main__":
main() |