Spaces:

wiusdy
/

VQA_fashion_hvar

Sleeping

App Files Files Community

wiusdy commited on Feb 13

Commit

e727785

•

1 Parent(s): 0306e1b

updating code and removing bugs

Browse files

Files changed (2) hide show

app.py +1 -1
inference.py +13 -4

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ inference = Inference()
 with gr.Blocks() as block:
-    options = gr.Dropdown(choices=["Model 1", "Model 2"], label="Models", info="Select the model to use..", )
     # need to improve this one...
     txt = gr.Textbox(label="Insert a question..", lines=2)

 with gr.Blocks() as block:
+    options = gr.Dropdown(choices=["ViLT", "Blip Saffal", "Blip CN"], label="Models", info="Select the model to use..", )
     # need to improve this one...
     txt = gr.Textbox(label="Insert a question..", lines=2)

inference.py CHANGED Viewed

@@ -9,7 +9,8 @@ class Inference:
         self.vilt_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
         self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
-        self.blip_model = BlipForQuestionAnswering.from_pretrained("wiusdy/blip_pretrained_saffal_fashion_finetuning").to("cuda")
         logging.set_verbosity_info()
         self.logger = logging.get_logger("transformers")
@@ -18,7 +19,9 @@ class Inference:
         if selected == "Model 1":
             return self.__inference_vilt(image, text)
         elif selected == "Model 2":
-            return self.__inference_deplot(image, text)
         else:
             self.logger.warning("Please select a model to make the inference..")
@@ -29,8 +32,14 @@ class Inference:
         idx = logits.argmax(-1).item()
         return f"{self.vilt_model.config.id2label[idx]}"
-    def __inference_deplot(self, image, text):
         encoding = self.blip_processor(image, text, return_tensors="pt").to("cuda:0", torch.float16)
-        out = self.blip_model.generate(**encoding)
         generated_text = self.blip_processor.decode(out[0], skip_special_tokens=True)
         return f"{generated_text}"

         self.vilt_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
         self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
+        self.blip_model_saffal = BlipForQuestionAnswering.from_pretrained("wiusdy/blip_pretrained_saffal_fashion_finetuning").to("cuda")
+        self.blip_model_control_net = BlipForQuestionAnswering.from_pretrained("wiusdy/blip_pretrained_control_net_fashion_finetuning").to("cuda")
         logging.set_verbosity_info()
         self.logger = logging.get_logger("transformers")
         if selected == "Model 1":
             return self.__inference_vilt(image, text)
         elif selected == "Model 2":
+            return self.__inference_saffal_blip(image, text)
+        elif selected == "Model 3":
+            return self.__inference_control_net_blip(image, text)
         else:
             self.logger.warning("Please select a model to make the inference..")
         idx = logits.argmax(-1).item()
         return f"{self.vilt_model.config.id2label[idx]}"
+    def __inference_saffal_blip(self, image, text):
         encoding = self.blip_processor(image, text, return_tensors="pt").to("cuda:0", torch.float16)
+        out = self.blip_model_saffal.generate(**encoding)
+        generated_text = self.blip_processor.decode(out[0], skip_special_tokens=True)
+        return f"{generated_text}"
+    def __inference_control_net_blip(self, image, text):
+        encoding = self.blip_processor(image, text, return_tensors="pt").to("cuda:0", torch.float16)
+        out = self.blip_model_control_net.generate(**encoding)
         generated_text = self.blip_processor.decode(out[0], skip_special_tokens=True)
         return f"{generated_text}"