Spaces:

CS4NLP
/

vqa_demo

Runtime error

MinxuanQin commited on Jul 19, 2023

Commit

d43497c

•

1 Parent(s): 2e4b982

fix model load error

Files changed (2) hide show

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ df = pd.read_json('vqa_samples.json', orient="columns")
 # define selector
 model_name = st.sidebar.selectbox(
     "Select a model: ",
-    ('vilt', 'git', 'blip', 'vbert')
 )
 image_selector_unspecific = st.number_input(
@@ -41,4 +41,4 @@ question = st.text_input(f"Ask the model a question related to the image: \n"
 args = load_model(model_name) # TODO: cache
 answer = get_answer(args, image, question, model_name)
 st.text(f"Answer by {model_name}: {answer}")
-st.text(f"Ground truth: {label}")

 # define selector
 model_name = st.sidebar.selectbox(
     "Select a model: ",
+    ('vilt', 'vilt_finetuned', 'git', 'blip', 'vbert')
 )
 image_selector_unspecific = st.number_input(
 args = load_model(model_name) # TODO: cache
 answer = get_answer(args, image, question, model_name)
 st.text(f"Answer by {model_name}: {answer}")
+st.text(f"Ground truth (of the example): {label}")

model_loader.py CHANGED Viewed

@@ -33,7 +33,10 @@ VQA_URL = "https://dl.fbaipublicfiles.com/pythia/data/answers_vqa.txt"
 def load_model(name):
     if name == "vilt":
         processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
-        model = ViltForQuestionAnswering.from_pretrained("CARETS/vilt_neg_model")
     elif name == "git":
         processor = AutoProcessor.from_pretrained("microsoft/git-base-vqav2")
         model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-vqav2")
@@ -155,6 +158,17 @@ def get_answer(model_loader_args, img, question, model_name):
             logits = outputs.logits
             idx = logits.argmax(-1).item()
             pred = model.config.id2label[idx]
     elif model_name == "git":
         try:

 def load_model(name):
     if name == "vilt":
         processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
+        model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
+    elif name == "vilt_finetuned":
+        processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
+        model = ViltForQuestionAnswering.from_pretrained("Minqin/carets_vqa_finetuned")
     elif name == "git":
         processor = AutoProcessor.from_pretrained("microsoft/git-base-vqav2")
         model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-vqav2")
             logits = outputs.logits
             idx = logits.argmax(-1).item()
             pred = model.config.id2label[idx]
+    elif model_name == "vilt_finetuned":
+        try:
+            encoding = processor(images=img, text=question, return_tensors="pt")
+        except Exception:
+            return err_msg()
+        else:
+            outputs = model(**encoding)
+            logits = outputs.logits
+            idx = logits.argmax(-1).item()
+            pred = model.config.id2label[idx]
     elif model_name == "git":
         try: