Update handler.py
Browse files- handler.py +14 -1
handler.py
CHANGED
@@ -17,9 +17,22 @@ class EndpointHandler():
|
|
17 |
self.model_name = "sooh-j/blip2-vizwizqa"
|
18 |
# self.base_model = Blip2ForConditionalGeneration.from_pretrained(self.model_base, load_in_8bit=True)
|
19 |
# self.pipe = Blip2ForConditionalGeneration.from_pretrained(self.model_base, load_in_8bit=True, torch_dtype=torch.float16)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
self.processor = Blip2Processor.from_pretrained(self.model_name)
|
21 |
self.model = BlipForQuestionAnswering.from_pretrained(self.model_name,
|
22 |
-
device_map="auto",
|
|
|
|
|
23 |
).to(self.device)
|
24 |
# self.model = PeftModel.from_pretrained(self.model_name, self.base_model_name).to(self.device)
|
25 |
|
|
|
17 |
self.model_name = "sooh-j/blip2-vizwizqa"
|
18 |
# self.base_model = Blip2ForConditionalGeneration.from_pretrained(self.model_base, load_in_8bit=True)
|
19 |
# self.pipe = Blip2ForConditionalGeneration.from_pretrained(self.model_base, load_in_8bit=True, torch_dtype=torch.float16)
|
20 |
+
|
21 |
+
quantization_config = BitsAndBytesConfig(load_in_8bit=True,
|
22 |
+
# llm_int8_threshold=200.0
|
23 |
+
)
|
24 |
+
# model = AutoModelForCausalLM.from_pretrained(
|
25 |
+
# "EleutherAI/gpt-neox-20b",
|
26 |
+
# torch_dtype=torch.float16,
|
27 |
+
# device_map="auto",
|
28 |
+
# quantization_config=quantization_config,
|
29 |
+
|
30 |
+
|
31 |
self.processor = Blip2Processor.from_pretrained(self.model_name)
|
32 |
self.model = BlipForQuestionAnswering.from_pretrained(self.model_name,
|
33 |
+
device_map="auto",
|
34 |
+
# load_in_8bit=True,
|
35 |
+
quantization_config=quantization_config,
|
36 |
).to(self.device)
|
37 |
# self.model = PeftModel.from_pretrained(self.model_name, self.base_model_name).to(self.device)
|
38 |
|