Spaces:
Sleeping
Sleeping
from pathlib import Path | |
from typing import Union | |
from pypdf import PdfReader | |
from transformers import pipeline | |
import gradio as gr | |
question_answerer = pipeline("question-answering", model="deepset/roberta-base-squad2") | |
def get_text_from_pdf(pdf_file: Union[str, Path]) -> str: | |
"""Read the PDF from the given path and return a string with its entire content.""" | |
reader = PdfReader(pdf_file) | |
# Extract text from all pages | |
full_text = "" | |
for page in reader.pages: | |
full_text += page.extract_text() | |
return full_text | |
def answer_doc_question(pdf_file, question): | |
pdf_text = get_text_from_pdf(pdf_file) | |
answer = question_answerer(question, pdf_text) | |
return answer["answer"] | |
pdf_input = gr.File(file_types=[".pdf"], label="Upload a PDF document and ask a question about it.") | |
question = gr.Textbox(label="Type a question regarding the uploaded document here.") | |
gr.Interface(fn=answer_doc_question, inputs=[pdf_input, question], outputs="text").launch() |