wiusdy commited on
Commit
0a5203f
·
1 Parent(s): 17f0aaa

engineering the code

Browse files
Files changed (3) hide show
  1. app.py +13 -11
  2. inference.py +11 -0
  3. utils.py +0 -0
app.py CHANGED
@@ -1,16 +1,8 @@
1
  import gradio as gr
2
  import os
3
- from transformers import ViltProcessor, ViltForQuestionAnswering
4
 
5
- def vqa(image, text):
6
- processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
7
- model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
8
- encoding = processor(image, text, return_tensors="pt")
9
-
10
- outputs = model(**encoding)
11
- logits = outputs.logits
12
- idx = logits.argmax(-1).item()
13
- return f"{model.config.id2label[idx]}"
14
 
15
 
16
  with gr.Blocks() as block:
@@ -22,7 +14,17 @@ with gr.Blocks() as block:
22
 
23
  dogs = os.path.join(os.path.dirname(__file__), "617.jpg")
24
  image = gr.Image(type="pil", value=dogs)
25
- btn.click(vqa, inputs=[image, txt], outputs=[txt_3])
 
 
 
 
 
 
 
 
 
 
26
 
27
  btn = gr.Button(value="Submit")
28
 
 
1
  import gradio as gr
2
  import os
 
3
 
4
+ from inference import *
5
+ from utils import *
 
 
 
 
 
 
 
6
 
7
 
8
  with gr.Blocks() as block:
 
14
 
15
  dogs = os.path.join(os.path.dirname(__file__), "617.jpg")
16
  image = gr.Image(type="pil", value=dogs)
17
+
18
+ selected_option = block.get_element("dropdown").value
19
+ # here we can insert two or more models to inference the data
20
+ if selected_option == "Model 1":
21
+ btn.click(inference, inputs=[image, txt], outputs=[txt_3 + " Model 1"])
22
+
23
+ elif selected_option == "Model 2":
24
+ btn.click(inference, inputs=[image, txt], outputs=[txt_3 + " Model 2"])
25
+
26
+ elif selected_option == "Model 3":
27
+ btn.click(inference, inputs=[image, txt], outputs=[txt_3 + " Model 3"])
28
 
29
  btn = gr.Button(value="Submit")
30
 
inference.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import ViltProcessor, ViltForQuestionAnswering
2
+
3
+ def inference(image, text):
4
+ processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
5
+ model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
6
+ encoding = processor(image, text, return_tensors="pt")
7
+
8
+ outputs = model(**encoding)
9
+ logits = outputs.logits
10
+ idx = logits.argmax(-1).item()
11
+ return f"{model.config.id2label[idx]}"
utils.py ADDED
File without changes