lvkaokao
commited on
Commit
•
653f44e
1
Parent(s):
dca5dbd
support fp32/fp16/bf16 eval.
Browse files- app.py +1 -1
- src/display/utils.py +18 -4
- src/leaderboard/read_evals.py +2 -2
- src/submission/check_validity.py +21 -3
- src/submission/submit.py +32 -4
app.py
CHANGED
@@ -572,7 +572,7 @@ with demo:
|
|
572 |
base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)",
|
573 |
visible=not IS_PUBLIC)
|
574 |
compute_type = gr.Dropdown(
|
575 |
-
choices=[i.value.name for i in ComputeDtype],
|
576 |
label="Compute dtype",
|
577 |
multiselect=False,
|
578 |
value="float16",
|
|
|
572 |
base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)",
|
573 |
visible=not IS_PUBLIC)
|
574 |
compute_type = gr.Dropdown(
|
575 |
+
choices=[i.value.name for i in ComputeDtype if i.value.name != "All"],
|
576 |
label="Compute dtype",
|
577 |
multiselect=False,
|
578 |
value="float16",
|
src/display/utils.py
CHANGED
@@ -242,6 +242,9 @@ class WeightDtype(Enum):
|
|
242 |
int4 = ModelDetails("int4")
|
243 |
nf4 = ModelDetails("nf4")
|
244 |
fp4 = ModelDetails("fp4")
|
|
|
|
|
|
|
245 |
|
246 |
Unknown = ModelDetails("?")
|
247 |
|
@@ -260,6 +263,12 @@ class WeightDtype(Enum):
|
|
260 |
return WeightDtype.fp4
|
261 |
if weight_dtype in ["All"]:
|
262 |
return WeightDtype.all
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
return WeightDtype.Unknown
|
264 |
|
265 |
class ComputeDtype(Enum):
|
@@ -317,8 +326,9 @@ class Precision(Enum):
|
|
317 |
qt_2bit = ModelDetails("2bit")
|
318 |
qt_3bit = ModelDetails("3bit")
|
319 |
qt_4bit = ModelDetails("4bit")
|
320 |
-
|
321 |
-
|
|
|
322 |
Unknown = ModelDetails("?")
|
323 |
|
324 |
def from_str(precision):
|
@@ -332,8 +342,12 @@ class Precision(Enum):
|
|
332 |
return Precision.qt_3bit
|
333 |
if precision in ["4bit"]:
|
334 |
return Precision.qt_4bit
|
335 |
-
|
336 |
-
|
|
|
|
|
|
|
|
|
337 |
return Precision.Unknown
|
338 |
|
339 |
|
|
|
242 |
int4 = ModelDetails("int4")
|
243 |
nf4 = ModelDetails("nf4")
|
244 |
fp4 = ModelDetails("fp4")
|
245 |
+
fp16 = ModelDetails("float16")
|
246 |
+
bf16 = ModelDetails("bfloat16")
|
247 |
+
fp32 = ModelDetails("float32")
|
248 |
|
249 |
Unknown = ModelDetails("?")
|
250 |
|
|
|
263 |
return WeightDtype.fp4
|
264 |
if weight_dtype in ["All"]:
|
265 |
return WeightDtype.all
|
266 |
+
if weight_dtype in ["float16"]:
|
267 |
+
return WeightDtype.fp16
|
268 |
+
if weight_dtype in ["bfloat16"]:
|
269 |
+
return WeightDtype.bf16
|
270 |
+
if weight_dtype in ["float32"]:
|
271 |
+
return WeightDtype.fp32
|
272 |
return WeightDtype.Unknown
|
273 |
|
274 |
class ComputeDtype(Enum):
|
|
|
326 |
qt_2bit = ModelDetails("2bit")
|
327 |
qt_3bit = ModelDetails("3bit")
|
328 |
qt_4bit = ModelDetails("4bit")
|
329 |
+
qt_8bit = ModelDetails("8bit")
|
330 |
+
qt_16bit = ModelDetails("16bit")
|
331 |
+
qt_32bit = ModelDetails("32bit")
|
332 |
Unknown = ModelDetails("?")
|
333 |
|
334 |
def from_str(precision):
|
|
|
342 |
return Precision.qt_3bit
|
343 |
if precision in ["4bit"]:
|
344 |
return Precision.qt_4bit
|
345 |
+
if precision in ["8bit"]:
|
346 |
+
return Precision.qt_8bit
|
347 |
+
if precision in ["16bit"]:
|
348 |
+
return Precision.qt_16bit
|
349 |
+
if precision in ["32bit"]:
|
350 |
+
return Precision.qt_32bit
|
351 |
return Precision.Unknown
|
352 |
|
353 |
|
src/leaderboard/read_evals.py
CHANGED
@@ -56,7 +56,7 @@ class EvalResult:
|
|
56 |
|
57 |
# Precision
|
58 |
precision = Precision.from_str(config.get("precision", "4bit"))
|
59 |
-
quant_type = QuantType.from_str(config.get("quant_type", "GPTQ"))
|
60 |
weight_dtype = WeightDtype.from_str(data["task_info"].get("weight_dtype", "int4"))
|
61 |
compute_dtype = ComputeDtype.from_str(data["task_info"].get("compute_dtype", "bfloat16"))
|
62 |
# double_quant = data["quantization_config"].get("bnb_4bit_use_double_quant", False)
|
@@ -209,7 +209,7 @@ def get_request_file_for_model(requests_path, model_name,
|
|
209 |
if (
|
210 |
req_content["status"] in ["Finished"]
|
211 |
and req_content["precision"] == precision.split(".")[-1]
|
212 |
-
and req_content["quant_type"] == quant_type
|
213 |
and req_content["weight_dtype"] == weight_dtype.split(".")[-1]
|
214 |
and req_content["compute_dtype"] == compute_dtype.split(".")[-1]
|
215 |
):
|
|
|
56 |
|
57 |
# Precision
|
58 |
precision = Precision.from_str(config.get("precision", "4bit"))
|
59 |
+
quant_type = QuantType.from_str(str(config.get("quant_type", "GPTQ")))
|
60 |
weight_dtype = WeightDtype.from_str(data["task_info"].get("weight_dtype", "int4"))
|
61 |
compute_dtype = ComputeDtype.from_str(data["task_info"].get("compute_dtype", "bfloat16"))
|
62 |
# double_quant = data["quantization_config"].get("bnb_4bit_use_double_quant", False)
|
|
|
209 |
if (
|
210 |
req_content["status"] in ["Finished"]
|
211 |
and req_content["precision"] == precision.split(".")[-1]
|
212 |
+
and str(req_content["quant_type"]) == quant_type
|
213 |
and req_content["weight_dtype"] == weight_dtype.split(".")[-1]
|
214 |
and req_content["compute_dtype"] == compute_dtype.split(".")[-1]
|
215 |
):
|
src/submission/check_validity.py
CHANGED
@@ -69,13 +69,27 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
|
|
69 |
return True, "uses a gated model.", None
|
70 |
return False, f"was not found or misconfigured on the hub! Error raised was {e.args[0]}", None
|
71 |
|
|
|
72 |
def get_model_size(model_info: ModelInfo, precision: str):
|
73 |
size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
|
74 |
safetensors = None
|
75 |
try:
|
76 |
safetensors = get_safetensors_metadata(model_info.id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
except Exception as e:
|
78 |
-
print(e)
|
79 |
|
80 |
if safetensors is not None:
|
81 |
model_size = round(sum(safetensors.parameter_count.values()) / 1e9, 3)
|
@@ -87,9 +101,13 @@ def get_model_size(model_info: ModelInfo, precision: str):
|
|
87 |
except AttributeError as e:
|
88 |
return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
|
89 |
|
90 |
-
size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
|
91 |
# model_size = size_factor * model_size
|
92 |
-
|
|
|
|
|
|
|
|
|
93 |
|
94 |
KNOWN_SIZE_FACTOR = {
|
95 |
"gptq": {"4bit": 8, "8bit": 4, "2bit": 8, "3bit": 12},
|
|
|
69 |
return True, "uses a gated model.", None
|
70 |
return False, f"was not found or misconfigured on the hub! Error raised was {e.args[0]}", None
|
71 |
|
72 |
+
|
73 |
def get_model_size(model_info: ModelInfo, precision: str):
|
74 |
size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
|
75 |
safetensors = None
|
76 |
try:
|
77 |
safetensors = get_safetensors_metadata(model_info.id)
|
78 |
+
num_parameters = 0
|
79 |
+
mem = 0
|
80 |
+
for key in safetensors.parameter_count:
|
81 |
+
if key in ["F16", "BF16"]:
|
82 |
+
mem += safetensors.parameter_count[key] * 2
|
83 |
+
else:
|
84 |
+
mem += safetensors.parameter_count[key] * 4
|
85 |
+
|
86 |
+
num_parameters += safetensors.parameter_count[key]
|
87 |
+
|
88 |
+
params_b = round(num_parameters / 1e9, 2)
|
89 |
+
size_gb = round(mem / 1e9,2)
|
90 |
+
return params_b, size_gb
|
91 |
except Exception as e:
|
92 |
+
print(str(e))
|
93 |
|
94 |
if safetensors is not None:
|
95 |
model_size = round(sum(safetensors.parameter_count.values()) / 1e9, 3)
|
|
|
101 |
except AttributeError as e:
|
102 |
return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
|
103 |
|
104 |
+
# size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
|
105 |
# model_size = size_factor * model_size
|
106 |
+
if precision == "16bit":
|
107 |
+
size_gb = model_size * 2
|
108 |
+
else:
|
109 |
+
size_gb = model_size * 4
|
110 |
+
return model_size, size_gb
|
111 |
|
112 |
KNOWN_SIZE_FACTOR = {
|
113 |
"gptq": {"4bit": 8, "8bit": 4, "2bit": 8, "3bit": 12},
|
src/submission/submit.py
CHANGED
@@ -157,11 +157,36 @@ def add_new_eval(
|
|
157 |
weight_dtype = "int2"
|
158 |
|
159 |
if quant_type is None or quant_type == "":
|
160 |
-
return styled_error("Please select a quantization model like GPTQ, AWQ etc.")
|
161 |
-
|
162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
quant_method=quant_type.lower(),
|
164 |
bits=precision)
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
|
166 |
if quant_type == "llama.cpp":
|
167 |
hardware = "cpu"
|
@@ -170,6 +195,9 @@ def add_new_eval(
|
|
170 |
else:
|
171 |
hardware = "gpu"
|
172 |
|
|
|
|
|
|
|
173 |
eval_entry = {
|
174 |
"model": model,
|
175 |
"revision": revision,
|
@@ -187,7 +215,7 @@ def add_new_eval(
|
|
187 |
"hardware": hardware,
|
188 |
"status": "Pending",
|
189 |
"submitted_time": current_time,
|
190 |
-
"model_type":
|
191 |
"job_id": -1,
|
192 |
"job_start_time": None,
|
193 |
"scripts": script
|
|
|
157 |
weight_dtype = "int2"
|
158 |
|
159 |
if quant_type is None or quant_type == "":
|
160 |
+
# return styled_error("Please select a quantization model like GPTQ, AWQ etc.")
|
161 |
+
# for eval fp32/fp16/bf16
|
162 |
+
quant_type = None
|
163 |
+
|
164 |
+
if quant_type is None:
|
165 |
+
weight_dtype = str(getattr(model_config, "torch_dtype", "float16"))
|
166 |
+
if weight_dtype in ["torch.float16", "float16"]:
|
167 |
+
weight_dtype = "float16"
|
168 |
+
precision = "16bit"
|
169 |
+
elif weight_dtype in ["torch.bfloat16", "bfloat16"]:
|
170 |
+
weight_dtype = "bfloat16"
|
171 |
+
precision = "16bit"
|
172 |
+
elif weight_dtype in ["torch.float32", "float32"]:
|
173 |
+
weight_dtype = "float32"
|
174 |
+
precision = "32bit"
|
175 |
+
else:
|
176 |
+
weight_dtype = "?"
|
177 |
+
precision = "?"
|
178 |
+
model_type = "original"
|
179 |
+
model_params, model_size = get_model_size(model_info=model_info, precision=precision)
|
180 |
+
else:
|
181 |
+
model_params, model_size = get_quantized_model_parameters_memory(model_info,
|
182 |
quant_method=quant_type.lower(),
|
183 |
bits=precision)
|
184 |
+
model_type = "quantization"
|
185 |
+
else:
|
186 |
+
model_params, model_size = get_quantized_model_parameters_memory(model_info,
|
187 |
+
quant_method=quant_type.lower(),
|
188 |
+
bits=precision)
|
189 |
+
model_type = "quantization"
|
190 |
|
191 |
if quant_type == "llama.cpp":
|
192 |
hardware = "cpu"
|
|
|
195 |
else:
|
196 |
hardware = "gpu"
|
197 |
|
198 |
+
if compute_dtype == "?":
|
199 |
+
compute_dtype = "float16"
|
200 |
+
|
201 |
eval_entry = {
|
202 |
"model": model,
|
203 |
"revision": revision,
|
|
|
215 |
"hardware": hardware,
|
216 |
"status": "Pending",
|
217 |
"submitted_time": current_time,
|
218 |
+
"model_type": model_type,
|
219 |
"job_id": -1,
|
220 |
"job_start_time": None,
|
221 |
"scripts": script
|