Spaces:
Paused
Paused
added support for 8bit, 4bit, gpq
Browse files
src/backend/manage_requests.py
CHANGED
@@ -29,11 +29,11 @@ class EvalRequest:
|
|
29 |
if self.precision in ["float16", "bfloat16", "float32"]:
|
30 |
model_args += f",dtype={self.precision}"
|
31 |
# Quantized models need some added config, the install of bits and bytes, etc
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
# A GPTQ model does not need dtype to be specified,
|
38 |
# it will be inferred from the config
|
39 |
pass
|
|
|
29 |
if self.precision in ["float16", "bfloat16", "float32"]:
|
30 |
model_args += f",dtype={self.precision}"
|
31 |
# Quantized models need some added config, the install of bits and bytes, etc
|
32 |
+
elif self.precision == "8bit":
|
33 |
+
model_args += ",load_in_8bit=True"
|
34 |
+
elif self.precision == "4bit":
|
35 |
+
model_args += ",load_in_4bit=True"
|
36 |
+
elif self.precision == "GPTQ":
|
37 |
# A GPTQ model does not need dtype to be specified,
|
38 |
# it will be inferred from the config
|
39 |
pass
|