sooh-j
/

blip2-vizwizqa

Visual Question Answering

Transformers

Safetensors

blip-2

Inference Endpoints

Model card Files Files and versions Community

sooh-j commited on Jun 4, 2024

Commit

ee0d8ea

verified ·

1 Parent(s): 573ceea

Update handler.py

Browse files

Files changed (1) hide show

handler.py +13 -50

handler.py CHANGED Viewed

@@ -1,7 +1,3 @@
-# import sys
-# import base64
-# import logging
-# import copy
 import numpy as np
 from transformers import Blip2Processor, Blip2ForConditionalGeneration, BlipForQuestionAnswering, BitsAndBytesConfig
 from transformers import AutoProcessor, AutoModelForCausalLM
@@ -19,13 +15,7 @@ class EndpointHandler():
         # self.base_model = Blip2ForConditionalGeneration.from_pretrained(self.model_base, load_in_8bit=True)
         # self.pipe = Blip2ForConditionalGeneration.from_pretrained(self.model_base, load_in_8bit=True, torch_dtype=torch.float16)
-        # model = AutoModelForCausalLM.from_pretrained(
-        #     "EleutherAI/gpt-neox-20b",
-        #     torch_dtype=torch.float16,
-        #     device_map="auto",
-        #     quantization_config=quantization_config,
-        quantization_config = BitsAndBytesConfig(load_in_8bit=True)#, bnb_4bit_compute_dtype=torch.bfloat16)
         # self.processor = Blip2Processor.from_pretrained(self.model_name)
         self.processor = AutoProcessor.from_pretrained(self.model_name)
@@ -38,32 +28,6 @@ class EndpointHandler():
                                                              ).to(self.device)
         # self.model = PeftModel.from_pretrained(self.model_name, self.base_model_name).to(self.device)
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.model.to(self.device)
-    # def _generate_answer(
-    #     self,
-    #     model_path,
-    #     prompt,
-    #     # num_inference_steps=25,
-    #     # guidance_scale=7.5,
-    #     # num_images_per_prompt=1
-    #     ):
-    #     self.pipe.to(self.device)
-    #     # pil_images = self.pipe(
-    #     #   prompt=prompt,
-    #     #   num_inference_steps=num_inference_steps,
-    #     #   guidance_scale=guidance_scale,
-    #     #   num_images_per_prompt=num_images_per_prompt).images
-    #     # np_images = []
-    #     # for i in range(len(pil_images)):
-    #     #   np_images.append(np.asarray(pil_images[i]))
-        # return np.stack(np_images, axis=0)
 # inputs = data.get("inputs")
 #         imageBase64 = inputs.get("image")
 #         # imageURL = inputs.get("image")
@@ -96,13 +60,14 @@ class EndpointHandler():
         inputs = data.pop("inputs", data)
         parameters = data.pop("parameters", {})
-        # try:
-        #     imageBase64 = inputs["image"]
-        #     image = Image.open(BytesIO(base64.b64decode(imageBase64.split(",")[1].encode())))
-        # except:
-        #     image_url = inputs['image']
-        #     image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
         question = inputs["question"]
@@ -112,14 +77,12 @@ class EndpointHandler():
         # image = Image.open(requests.get(imageBase64, stream=True).raw)
         # image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
 #### https://huggingface.co/SlowPacer/witron-image-captioning/blob/main/handler.py
-        inputs = data.pop("inputs", data)
-        parameters = data.pop("parameters", {})
-        if isinstance(inputs, Image.Image):
-            image = [inputs]
-        else:
-            inputs = isinstance(inputs, str) and [inputs] or inputs
-            image = [Image.open(BytesIO(base64.b64decode(_img))) for _img in inputs]
         # processed_images = self.processor(images=raw_images, return_tensors="pt")
         # processed_images["pixel_values"] = processed_images["pixel_values"].to(device)

 import numpy as np
 from transformers import Blip2Processor, Blip2ForConditionalGeneration, BlipForQuestionAnswering, BitsAndBytesConfig
 from transformers import AutoProcessor, AutoModelForCausalLM
         # self.base_model = Blip2ForConditionalGeneration.from_pretrained(self.model_base, load_in_8bit=True)
         # self.pipe = Blip2ForConditionalGeneration.from_pretrained(self.model_base, load_in_8bit=True, torch_dtype=torch.float16)
+        quantization_config = BitsAndBytesConfig(load_in_8bit=True)
         # self.processor = Blip2Processor.from_pretrained(self.model_name)
         self.processor = AutoProcessor.from_pretrained(self.model_name)
                                                              ).to(self.device)
         # self.model = PeftModel.from_pretrained(self.model_name, self.base_model_name).to(self.device)
 # inputs = data.get("inputs")
 #         imageBase64 = inputs.get("image")
 #         # imageURL = inputs.get("image")
         inputs = data.pop("inputs", data)
         parameters = data.pop("parameters", {})
+        try:
+            imageBase64 = inputs["image"]
+            # image = Image.open(BytesIO(base64.b64decode(imageBase64.split(",")[1].encode())))
+            image = Image.open(BytesIO(base64.b64decode(imageBase64)))
+        except:
+            image_url = inputs['image']
+            image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
         question = inputs["question"]
         # image = Image.open(requests.get(imageBase64, stream=True).raw)
         # image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
 #### https://huggingface.co/SlowPacer/witron-image-captioning/blob/main/handler.py
+        # if isinstance(inputs, Image.Image):
+        #     image = [inputs]
+        # else:
+        #     inputs = isinstance(inputs, str) and [inputs] or inputs
+        #     image = [Image.open(BytesIO(base64.b64decode(_img))) for _img in inputs]
         # processed_images = self.processor(images=raw_images, return_tensors="pt")
         # processed_images["pixel_values"] = processed_images["pixel_values"].to(device)