barghavani's picture
Update app.py
d92f5f4 verified
raw
history blame contribute delete
998 Bytes
from pathlib import Path
from typing import Union
from pypdf import PdfReader
from transformers import pipeline
import gradio as gr
question_answerer = pipeline("question-answering", model="deepset/roberta-base-squad2")
def get_text_from_pdf(pdf_file: Union[str, Path]) -> str:
"""Read the PDF from the given path and return a string with its entire content."""
reader = PdfReader(pdf_file)
# Extract text from all pages
full_text = ""
for page in reader.pages:
full_text += page.extract_text()
return full_text
def answer_doc_question(pdf_file, question):
pdf_text = get_text_from_pdf(pdf_file)
answer = question_answerer(question, pdf_text)
return answer["answer"]
pdf_input = gr.File(file_types=[".pdf"], label="Upload a PDF document and ask a question about it.")
question = gr.Textbox(label="Type a question regarding the uploaded document here.")
gr.Interface(fn=answer_doc_question, inputs=[pdf_input, question], outputs="text").launch()