from pathlib import Path from typing import Union from pypdf import PdfReader from transformers import pipeline import gradio as gr question_answerer = pipeline("question-answering", model="deepset/roberta-base-squad2") def get_text_from_pdf(pdf_file: Union[str, Path]) -> str: """Read the PDF from the given path and return a string with its entire content.""" reader = PdfReader(pdf_file) # Extract text from all pages full_text = "" for page in reader.pages: full_text += page.extract_text() return full_text def answer_doc_question(pdf_file, question): pdf_text = get_text_from_pdf(pdf_file) answer = question_answerer(question, pdf_text) return answer["answer"] pdf_input = gr.File(file_types=[".pdf"], label="Upload a PDF document and ask a question about it.") question = gr.Textbox(label="Type a question regarding the uploaded document here.") gr.Interface(fn=answer_doc_question, inputs=[pdf_input, question], outputs="text").launch()