sooh-j
/

blip2-vizwizqa

Visual Question Answering

Inference Endpoints

Model card Files Files and versions Community

sooh-j commited on Jun 4, 2024

Commit

a6def05

·

verified ·

1 Parent(s): 4ce7ea7

Update handler.py

Files changed (1) hide show

handler.py +14 -1

handler.py CHANGED Viewed

@@ -17,9 +17,22 @@ class EndpointHandler():
         self.model_name = "sooh-j/blip2-vizwizqa"
         # self.base_model = Blip2ForConditionalGeneration.from_pretrained(self.model_base, load_in_8bit=True)
         # self.pipe = Blip2ForConditionalGeneration.from_pretrained(self.model_base, load_in_8bit=True, torch_dtype=torch.float16)
         self.processor = Blip2Processor.from_pretrained(self.model_name)
         self.model = BlipForQuestionAnswering.from_pretrained(self.model_name,
-                                                              device_map="auto", load_in_8bit=True
                                                              ).to(self.device)
         # self.model = PeftModel.from_pretrained(self.model_name, self.base_model_name).to(self.device)

         self.model_name = "sooh-j/blip2-vizwizqa"
         # self.base_model = Blip2ForConditionalGeneration.from_pretrained(self.model_base, load_in_8bit=True)
         # self.pipe = Blip2ForConditionalGeneration.from_pretrained(self.model_base, load_in_8bit=True, torch_dtype=torch.float16)
+        quantization_config = BitsAndBytesConfig(load_in_8bit=True,
+                                         # llm_int8_threshold=200.0
+                                                )
+        # model = AutoModelForCausalLM.from_pretrained(
+        #     "EleutherAI/gpt-neox-20b",
+        #     torch_dtype=torch.float16,
+        #     device_map="auto",
+        #     quantization_config=quantization_config,
         self.processor = Blip2Processor.from_pretrained(self.model_name)
         self.model = BlipForQuestionAnswering.from_pretrained(self.model_name,
+                                                              device_map="auto",
+                                                              # load_in_8bit=True,
+                                                              quantization_config=quantization_config,
                                                              ).to(self.device)
         # self.model = PeftModel.from_pretrained(self.model_name, self.base_model_name).to(self.device)