File size: 998 Bytes
80a01f0
 
24555e8
80a01f0
 
 
24555e8
 
d92f5f4
60b1010
9b01105
80a01f0
 
 
6fabaae
80a01f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from pathlib import Path
from typing import Union

from pypdf import PdfReader
from transformers import pipeline
import gradio as gr


question_answerer = pipeline("question-answering", model="deepset/roberta-base-squad2")


def get_text_from_pdf(pdf_file: Union[str, Path]) -> str:
    """Read the PDF from the given path and return a string with its entire content."""
    reader = PdfReader(pdf_file)

    # Extract text from all pages
    full_text = ""
    for page in reader.pages:
        full_text += page.extract_text()
    return full_text


def answer_doc_question(pdf_file, question):
    pdf_text = get_text_from_pdf(pdf_file)
    answer = question_answerer(question, pdf_text)
    return answer["answer"]


pdf_input = gr.File(file_types=[".pdf"], label="Upload a PDF document and ask a question about it.")
question = gr.Textbox(label="Type a question regarding the uploaded document here.")
gr.Interface(fn=answer_doc_question, inputs=[pdf_input, question], outputs="text").launch()