Spaces:
Sleeping
Sleeping
engineering the code
Browse files- app.py +13 -11
- inference.py +11 -0
- utils.py +0 -0
app.py
CHANGED
@@ -1,16 +1,8 @@
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
-
from transformers import ViltProcessor, ViltForQuestionAnswering
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
8 |
-
encoding = processor(image, text, return_tensors="pt")
|
9 |
-
|
10 |
-
outputs = model(**encoding)
|
11 |
-
logits = outputs.logits
|
12 |
-
idx = logits.argmax(-1).item()
|
13 |
-
return f"{model.config.id2label[idx]}"
|
14 |
|
15 |
|
16 |
with gr.Blocks() as block:
|
@@ -22,7 +14,17 @@ with gr.Blocks() as block:
|
|
22 |
|
23 |
dogs = os.path.join(os.path.dirname(__file__), "617.jpg")
|
24 |
image = gr.Image(type="pil", value=dogs)
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
btn = gr.Button(value="Submit")
|
28 |
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
|
|
3 |
|
4 |
+
from inference import *
|
5 |
+
from utils import *
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
|
8 |
with gr.Blocks() as block:
|
|
|
14 |
|
15 |
dogs = os.path.join(os.path.dirname(__file__), "617.jpg")
|
16 |
image = gr.Image(type="pil", value=dogs)
|
17 |
+
|
18 |
+
selected_option = block.get_element("dropdown").value
|
19 |
+
# here we can insert two or more models to inference the data
|
20 |
+
if selected_option == "Model 1":
|
21 |
+
btn.click(inference, inputs=[image, txt], outputs=[txt_3 + " Model 1"])
|
22 |
+
|
23 |
+
elif selected_option == "Model 2":
|
24 |
+
btn.click(inference, inputs=[image, txt], outputs=[txt_3 + " Model 2"])
|
25 |
+
|
26 |
+
elif selected_option == "Model 3":
|
27 |
+
btn.click(inference, inputs=[image, txt], outputs=[txt_3 + " Model 3"])
|
28 |
|
29 |
btn = gr.Button(value="Submit")
|
30 |
|
inference.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import ViltProcessor, ViltForQuestionAnswering
|
2 |
+
|
3 |
+
def inference(image, text):
|
4 |
+
processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
5 |
+
model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
6 |
+
encoding = processor(image, text, return_tensors="pt")
|
7 |
+
|
8 |
+
outputs = model(**encoding)
|
9 |
+
logits = outputs.logits
|
10 |
+
idx = logits.argmax(-1).item()
|
11 |
+
return f"{model.config.id2label[idx]}"
|
utils.py
ADDED
File without changes
|