Spaces:
Sleeping
Sleeping
File size: 998 Bytes
80a01f0 24555e8 80a01f0 24555e8 d92f5f4 60b1010 9b01105 80a01f0 6fabaae 80a01f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
from pathlib import Path
from typing import Union
from pypdf import PdfReader
from transformers import pipeline
import gradio as gr
question_answerer = pipeline("question-answering", model="deepset/roberta-base-squad2")
def get_text_from_pdf(pdf_file: Union[str, Path]) -> str:
"""Read the PDF from the given path and return a string with its entire content."""
reader = PdfReader(pdf_file)
# Extract text from all pages
full_text = ""
for page in reader.pages:
full_text += page.extract_text()
return full_text
def answer_doc_question(pdf_file, question):
pdf_text = get_text_from_pdf(pdf_file)
answer = question_answerer(question, pdf_text)
return answer["answer"]
pdf_input = gr.File(file_types=[".pdf"], label="Upload a PDF document and ask a question about it.")
question = gr.Textbox(label="Type a question regarding the uploaded document here.")
gr.Interface(fn=answer_doc_question, inputs=[pdf_input, question], outputs="text").launch() |