Shaltiel commited on
Commit
d47ac37
1 Parent(s): 5f1bc85

added support for 8bit, 4bit, gpq

Browse files
Files changed (1) hide show
  1. src/backend/manage_requests.py +5 -5
src/backend/manage_requests.py CHANGED
@@ -29,11 +29,11 @@ class EvalRequest:
29
  if self.precision in ["float16", "bfloat16", "float32"]:
30
  model_args += f",dtype={self.precision}"
31
  # Quantized models need some added config, the install of bits and bytes, etc
32
- #elif self.precision == "8bit":
33
- # model_args += ",load_in_8bit=True"
34
- #elif self.precision == "4bit":
35
- # model_args += ",load_in_4bit=True"
36
- #elif self.precision == "GPTQ":
37
  # A GPTQ model does not need dtype to be specified,
38
  # it will be inferred from the config
39
  pass
 
29
  if self.precision in ["float16", "bfloat16", "float32"]:
30
  model_args += f",dtype={self.precision}"
31
  # Quantized models need some added config, the install of bits and bytes, etc
32
+ elif self.precision == "8bit":
33
+ model_args += ",load_in_8bit=True"
34
+ elif self.precision == "4bit":
35
+ model_args += ",load_in_4bit=True"
36
+ elif self.precision == "GPTQ":
37
  # A GPTQ model does not need dtype to be specified,
38
  # it will be inferred from the config
39
  pass