stefanbschneider commited on
Commit
0545b56
1 Parent(s): 3dda550

Create simple PDF qna interface

Browse files
Files changed (1) hide show
  1. app.py +31 -0
app.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from typing import Union
3
+
4
+ from pypdf import PdfReader
5
+ from transformers import pipeline
6
+ import gradio as gr
7
+
8
+
9
+ question_answerer = pipeline(task="question-answering", model="deepset/tinyroberta-squad2")
10
+
11
+
12
+ def get_text_from_pdf(pdf_file: Union[str, Path]) -> str:
13
+ """Read the PDF from the given path and return a string with its entire content."""
14
+ reader = PdfReader(pdf_file)
15
+
16
+ # Extract text from all pages
17
+ full_text = ""
18
+ for page in reader.pages:
19
+ full_text += page.extract_text()
20
+ return full_text
21
+
22
+
23
+ def answer_doc_question(pdf_file, question):
24
+ pdf_text = get_text_from_pdf(pdf_file)
25
+ answer = question_answerer(question, pdf_text)
26
+ return answer["answer"]
27
+
28
+
29
+ pdf_input = gr.File(file_types=[".pdf"])
30
+ question = gr.Textbox(label="Type a question regarding the uploaded document here.")
31
+ gr.Interface(fn=answer_doc_question, inputs=[pdf_input, question], outputs="text").launch()