savhascelik commited on
Commit
87a496b
·
1 Parent(s): 925e388

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
+ import torch
3
+ import gradio as gr
4
+ import re
5
+ import fitz
6
+
7
+
8
+
9
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+
11
+ tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
12
+ model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large").to(device)
13
+
14
+ class GUI:
15
+
16
+ def preprocess(self,text):
17
+ text = text.replace('\n', ' ')
18
+ text = re.sub('\s+', ' ', text)
19
+ return text
20
+
21
+ def query_from_list(self,query, options, tok_len):
22
+
23
+
24
+ t5query = f"""Question: "{query}" Context: {options}"""
25
+ inputs = tokenizer(t5query, return_tensors="pt").to(device)
26
+ outputs = model.generate(**inputs, max_new_tokens=tok_len)
27
+ return tokenizer.batch_decode(outputs, skip_special_tokens=True)
28
+
29
+
30
+
31
+
32
+ def begin(self,pdf,question,start_page=1, end_page=None):
33
+
34
+ doc = fitz.open(pdf)
35
+ total_pages = doc.page_count
36
+
37
+ if end_page is None:
38
+ end_page = total_pages
39
+
40
+ pdf_text = ""
41
+
42
+ for i in range(start_page-1, end_page):
43
+ text = doc.load_page(i).get_text("text")
44
+ text = app.preprocess(text)
45
+ pdf_text+=text
46
+ # Call the LLM with input data and instruction
47
+ input_data=pdf_text
48
+
49
+ results = app.query_from_list(question, input_data, 30)
50
+
51
+ return results
52
+
53
+ app = GUI()
54
+ title = "Get answers from your document with questions with Flan-T5"
55
+ description = "Results will show up in a few seconds."
56
+
57
+ article="<b>References</b><br>[1] FLAN-T5” 2022. <a href='https://huggingface.co/docs/transformers/model_doc/flan-t5'>Transformers Link</a><br>"
58
+
59
+
60
+ css = """.output_image, .input_image {height: 600px !important}"""
61
+
62
+ iface = gr.Interface(fn=app.begin,
63
+ inputs=[gr.File(label="PDF File",file_types=['.pdf']), gr.Textbox(label="Question") ],
64
+ outputs = gr.Text(label="Answer Summary"),
65
+ title=title,
66
+ description=description,
67
+ article=article,
68
+ css=css,
69
+ analytics_enabled = True, enable_queue=True)
70
+
71
+ iface.launch(inline=False, share=False, debug=False)