import os import streamlit as st from transformers import pipeline from PyPDF2 import PdfReader import tempfile # Function to perform question-answering @st.cache_data(show_spinner=False) def question_answering(questions, pdf_text): # Perform question-answering using Hugging Face's Transformers question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad") answers = [] for question in questions: answer = question_answerer(question=question, context=pdf_text) answers.append(answer) return answers def main(): st.title("Question Answering on PDF Files") uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"]) st.write("Enter your question(s) below (separate multiple questions with new lines):") questions = st.text_area("Questions").split('\n') if st.button("Answer") and uploaded_file is not None: pdf_path = os.path.join(tempfile.gettempdir(), uploaded_file.name) with open(pdf_path, "wb") as f: f.write(uploaded_file.read()) # Read PDF text once and cache it for batch processing pdf_reader = PdfReader(pdf_path) pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages]) # Perform question-answering in batches answers = question_answering(questions, pdf_text) # Display the results as a table with a header row table_data = [["Question", "Answer", "Score"]] for i, (question, answer) in enumerate(zip(questions, answers)): table_data.append([question, answer['answer'], f"{answer['score']:.2f}"]) st.write("Questions and Answers:") st.table(table_data) if __name__ == "__main__": main()