TusharGoel commited on
Commit
8bd3a78
·
1 Parent(s): 20d1199

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -0
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForQuestionAnswering, AutoTokenizer, LayoutLMv3ImageProcessor
3
+
4
+ model_name = "TusharGoel/LiLT-Document-QA"
5
+ tokenizer = AutoTokenizer.from_pretrained(model_name, apply_ocr = True)
6
+ image_processor = LayoutLMv3ImageProcessor()
7
+
8
+ model = AutoModelForQuestionAnswering.from_pretrained(model_name)
9
+ model.eval()
10
+
11
+ def qna(image, question):
12
+
13
+ res = image_processor(image, apply_ocr = True)
14
+ words = res["words"][0]
15
+ boxes = res["boxes"][0]
16
+
17
+ encoding = tokenizer(question, words, boxes = boxes, return_token_type_ids=True, return_tensors="pt", truncation=True, padding="max_length")
18
+
19
+ word_ids = encoding.word_ids(0)
20
+ outputs = model(**encoding)
21
+
22
+ print(outputs)
23
+
24
+ start_scores = outputs.start_logits
25
+ end_scores = outputs.end_logits
26
+
27
+ start, end = word_ids[start_scores.argmax(-1).item()], word_ids[end_scores.argmax(-1).item()]
28
+
29
+ answer = " ".join(words[start : end + 1])
30
+
31
+
32
+ return answer
33
+
34
+
35
+ img = gr.Image(source="upload", label="Image")
36
+ question = gr.Text(label="Question")
37
+
38
+ iface = gr.Interface(fn=qna, inputs=[img, question], outputs="text", title="LiLT - Document Question Answering")
39
+ iface.launch()