sooh-j
/

blip2-vizwizqa

Visual Question Answering

Transformers

Safetensors

blip-2

Inference Endpoints

Model card Files Files and versions Community

sooh-j commited on Jun 20

Commit

50da7fb

•

1 Parent(s): 90c4e38

Update handler.py

Browse files

Files changed (1) hide show

handler.py +8 -56

handler.py CHANGED Viewed

@@ -8,15 +8,13 @@ import requests
 import torch
 from io import BytesIO
 import base64
 class EndpointHandler():
     def __init__(self, path=""):
         self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
         print("device:",self.device)
         self.model_base = "Salesforce/blip2-opt-2.7b"
         self.model_name = "sooh-j/blip2-vizwizqa"
-        # self.pipe = Blip2ForConditionalGeneration.from_pretrained(self.model_base, load_in_8bit=True, torch_dtype=torch.float16)
         self.processor = AutoProcessor.from_pretrained(self.model_name)
         self.model = Blip2ForConditionalGeneration.from_pretrained(self.model_name,
                                                               device_map="auto",
@@ -37,69 +35,22 @@ class EndpointHandler():
         #         image: await (await fetch('https://placekitten.com/300/300')).blob()
         #       }
         #     })
-        ###################
         inputs = data.get("inputs")
         imageBase64 = inputs.get("image")
-        # imageURL = inputs.get("image")
         question = inputs.get("question")
-        # print(imageURL)
-        # print(text)
-        # image = Image.open(requests.get(imageBase64, stream=True).raw)
-        import base64
-        from PIL import Image
-        # import matplotlib.pyplot as plt
-        #try2
-        # image = Image.open(BytesIO(base64.b64decode(imageBase64)))
-        #try1
-        image = Image.open(BytesIO(base64.b64decode(imageBase64.split(",")[0].encode())))
-        ###################
-######################################
-        # inputs = data.pop("inputs", data)
-        # parameters = data.pop("parameters", {})
-        # # if isinstance(inputs, Image.Image):
-        # #     image = [inputs]
-        # # else:
-        # #     try:
-        # #         imageBase64 = inputs["image"]
-        # #         # image = Image.open(BytesIO(base64.b64decode(imageBase64.split(",")[1].encode())))
-        # #         image = Image.open(BytesIO(base64.b64decode(imageBase64)))
-        #     # except:
-        # image_url = inputs['image']
-        # image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
-        # question = inputs["question"]
-######################################
-        # data = data.pop("inputs", data)
-        # data = data.pop("image", image)
         # image = Image.open(requests.get(imageBase64, stream=True).raw)
-        # image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
-#### https://huggingface.co/SlowPacer/witron-image-captioning/blob/main/handler.py
-        # if isinstance(inputs, Image.Image):
-        #     image = [inputs]
-        # else:
-        #     inputs = isinstance(inputs, str) and [inputs] or inputs
-        #     image = [Image.open(BytesIO(base64.b64decode(_img))) for _img in inputs]
-        # processed_images = self.processor(images=raw_images, return_tensors="pt")
-        # processed_images["pixel_values"] = processed_images["pixel_values"].to(device)
-        # processed_images = {**processed_images, **parameters}
-####
         prompt = f"Question: {question}, Answer:"
         processed = self.processor(images=image, text=prompt, return_tensors="pt").to(self.device)
-        # answer = self._generate_answer(
-        #     model_path, prompt, image,
-        # )
         with torch.no_grad():
             out = self.model.generate(**processed, max_new_tokens=512).to(self.device)
@@ -107,4 +58,5 @@ class EndpointHandler():
         text_output = self.processor.decode(out[0], skip_special_tokens=True)
         result["text_output"] = text_output
         score = 0
         return [{"answer":text_output,"score":score}]

 import torch
 from io import BytesIO
 import base64
 class EndpointHandler():
     def __init__(self, path=""):
         self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
         print("device:",self.device)
         self.model_base = "Salesforce/blip2-opt-2.7b"
         self.model_name = "sooh-j/blip2-vizwizqa"
         self.processor = AutoProcessor.from_pretrained(self.model_name)
         self.model = Blip2ForConditionalGeneration.from_pretrained(self.model_name,
                                                               device_map="auto",
         #         image: await (await fetch('https://placekitten.com/300/300')).blob()
         #       }
         #     })
         inputs = data.get("inputs")
         imageBase64 = inputs.get("image")
         question = inputs.get("question")
+        # imageURL = inputs.get("image")
         # image = Image.open(requests.get(imageBase64, stream=True).raw)
+        if 'http:' in imageBase64:
+            image = Image.open(requests.get(imageBase64, stream=True).raw)
+        else:
+            image = Image.open(BytesIO(base64.b64decode(imageBase64.split(",")[0].encode())))
         prompt = f"Question: {question}, Answer:"
         processed = self.processor(images=image, text=prompt, return_tensors="pt").to(self.device)
         with torch.no_grad():
             out = self.model.generate(**processed, max_new_tokens=512).to(self.device)
         text_output = self.processor.decode(out[0], skip_special_tokens=True)
         result["text_output"] = text_output
         score = 0
         return [{"answer":text_output,"score":score}]