stefanbschneider's picture
Create simple PDF qna interface
0545b56 verified
raw
history blame
No virus
942 Bytes
from pathlib import Path
from typing import Union
from pypdf import PdfReader
from transformers import pipeline
import gradio as gr
question_answerer = pipeline(task="question-answering", model="deepset/tinyroberta-squad2")
def get_text_from_pdf(pdf_file: Union[str, Path]) -> str:
"""Read the PDF from the given path and return a string with its entire content."""
reader = PdfReader(pdf_file)
# Extract text from all pages
full_text = ""
for page in reader.pages:
full_text += page.extract_text()
return full_text
def answer_doc_question(pdf_file, question):
pdf_text = get_text_from_pdf(pdf_file)
answer = question_answerer(question, pdf_text)
return answer["answer"]
pdf_input = gr.File(file_types=[".pdf"])
question = gr.Textbox(label="Type a question regarding the uploaded document here.")
gr.Interface(fn=answer_doc_question, inputs=[pdf_input, question], outputs="text").launch()