MinxuanQin commited on
Commit
c40a6be
1 Parent(s): 520d399

update img loading

Browse files
Files changed (2) hide show
  1. app.py +7 -4
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import numpy as np
2
  from PIL import Image
3
  from transformers import ViltConfig, ViltProcessor, ViltForQuestionAnswering
4
-
5
  import streamlit as st
6
 
7
  st.title("Live demo of multimodal vqa")
@@ -16,10 +16,13 @@ uploaded_file = st.file_uploader("Please upload one image (jpg)", type="jpg")
16
  question = st.text_input("Type here one question on the image")
17
  if uploaded_file is not None:
18
  file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8)
19
- img = Image.fromarray(file_bytes)
20
- # st.image(img, caption="Here is the uploaded image", use_column_width=True)
 
 
 
21
 
22
- encoding = processor(images=file_bytes, text=question, return_tensors="pt")
23
 
24
  outputs = model(**encoding)
25
  logits = outputs.logits
 
1
  import numpy as np
2
  from PIL import Image
3
  from transformers import ViltConfig, ViltProcessor, ViltForQuestionAnswering
4
+ import cv2
5
  import streamlit as st
6
 
7
  st.title("Live demo of multimodal vqa")
 
16
  question = st.text_input("Type here one question on the image")
17
  if uploaded_file is not None:
18
  file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8)
19
+ opencv_img = cv2.imdecode(file_bytes, 1)
20
+ image_cv2 = cv2.cvtColor(opencv_img, cv2.COLOR_BGR2RGB)
21
+ st.image(image_cv2, channels="RGB")
22
+
23
+ img = Image.fromarray(image_cv2)
24
 
25
+ encoding = processor(images=img, text=question, return_tensors="pt")
26
 
27
  outputs = model(**encoding)
28
  logits = outputs.logits
requirements.txt CHANGED
@@ -9,4 +9,5 @@ pandas
9
  pyarrow==10
10
  nltk
11
  torchsummary
12
- matplotlib
 
 
9
  pyarrow==10
10
  nltk
11
  torchsummary
12
+ matplotlib
13
+ opencv-pythons