lvkaokao commited on
Commit
cf7af95
·
1 Parent(s): a16a56e

update model size.

Browse files
src/submission/check_validity.py CHANGED
@@ -90,6 +90,59 @@ def get_model_size(model_info: ModelInfo, precision: str):
90
  # model_size = size_factor * model_size
91
  return model_size
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  def get_model_arch(model_info: ModelInfo):
94
  return model_info.config.get("architectures", "Unknown")
95
 
 
90
  # model_size = size_factor * model_size
91
  return model_size
92
 
93
+ KNOWN_SIZE_FACTOR = {
94
+ "gptq": {"4bit": 8, "8bit": 4},
95
+ "awq": {"4bit": 8},
96
+ "bitsandbytes": {"4bit": 2}
97
+ }
98
+
99
+ BYTES = {
100
+ "I32": 4,
101
+ "F16": 2,
102
+ "BF16": 2,
103
+ "F32": 4,
104
+ "U8": 1}
105
+
106
+ def get_quantized_model_parameters_memory(model_info: ModelInfo, quant_method="", bits="4bit"):
107
+ try:
108
+ safetensors = get_safetensors_metadata(model_info.id)
109
+ num_parameters = 0
110
+ mem = 0
111
+ for key in safetensors.parameter_count:
112
+ mem += safetensors.parameter_count[key] * BYTES[key]
113
+
114
+ if key in ["I32", "U8"]:
115
+ num_parameters += safetensors.parameter_count[key] * KNOWN_SIZE_FACTOR[quant_method][bits]
116
+ params_b = round(num_parameters / 1e9, 2)
117
+ size_gb = round(mem / 1e9,2)
118
+ return params_b, size_gb
119
+ except Exception as e:
120
+ print(str(e))
121
+
122
+ filenames = [sib.rfilename for sib in model_info.siblings]
123
+ if "pytorch_model.bin" in filenames:
124
+ url = hf_hub_url(model_info.id, filename="pytorch_model.bin")
125
+ meta = get_hf_file_metadata(url)
126
+ params_b = round(meta.size * 2 / 1e9, 2)
127
+ size_gb = round(meta.size / 1e9, 2)
128
+ return params_b, size_gb
129
+
130
+ if "pytorch_model.bin.index.json" in filenames:
131
+ index_path = hf_hub_download(model_info.id, filename="pytorch_model.bin.index.json")
132
+ """
133
+ {
134
+ "metadata": {
135
+ "total_size": 28272820224
136
+ },....
137
+ """
138
+ size = json.load(open(index_path))
139
+ bytes_per_param = 2
140
+ if ("metadata" in size) and ("total_size" in size["metadata"]):
141
+ return round(size["metadata"]["total_size"] / bytes_per_param / 1e9, 2), \
142
+ round(size["metadata"]["total_size"] / 1e9, 2)
143
+
144
+ return None, None
145
+
146
  def get_model_arch(model_info: ModelInfo):
147
  return model_info.config.get("architectures", "Unknown")
148
 
src/submission/submit.py CHANGED
@@ -11,6 +11,7 @@ from src.submission.check_validity import (
11
  already_submitted_models,
12
  check_model_card,
13
  get_model_size,
 
14
  is_model_on_hub,
15
  is_gguf_on_hub,
16
  user_submission_permission,
@@ -95,10 +96,6 @@ def add_new_eval(
95
  except Exception:
96
  return styled_error("Could not get your model information. Please fill it up properly.")
97
 
98
-
99
- # ToDo: need to chek
100
- model_size = get_model_size(model_info=model_info, precision=precision)
101
-
102
  # Were the model card and license filled?
103
  try:
104
  if model_info.cardData is None:
@@ -146,15 +143,9 @@ def add_new_eval(
146
  if quant_type is None or quant_type == "":
147
  return styled_error("Please select a quantization model like GPTQ, AWQ etc.")
148
 
149
- if precision in ["4bit", "8bit"]:
150
- model_params = model_size * 8
151
-
152
- if precision == "4bit":
153
- model_size = model_params * 0.5
154
-
155
- if precision == "8bit":
156
- model_size = model_params
157
-
158
 
159
  if quant_type == "llama.cpp":
160
  hardware = "cpu"
@@ -163,9 +154,6 @@ def add_new_eval(
163
  else:
164
  hardware = "gpu"
165
 
166
- # model = "/dataset/llama3_8b_instruct-chat-autoround-w4g128-gpu"
167
- # all on gpu
168
- # hardware = "gpu"
169
  if hardware == "gpu" and compute_dtype == "bfloat16":
170
  compute_dtype = "float16"
171
 
@@ -201,7 +189,6 @@ def add_new_eval(
201
  "created_at": created_at
202
  }
203
  print(eval_entry)
204
- print(supplementary_info)
205
 
206
  # ToDo: need open
207
  # Check for duplicate submission
 
11
  already_submitted_models,
12
  check_model_card,
13
  get_model_size,
14
+ get_quantized_model_parameters_memory,
15
  is_model_on_hub,
16
  is_gguf_on_hub,
17
  user_submission_permission,
 
96
  except Exception:
97
  return styled_error("Could not get your model information. Please fill it up properly.")
98
 
 
 
 
 
99
  # Were the model card and license filled?
100
  try:
101
  if model_info.cardData is None:
 
143
  if quant_type is None or quant_type == "":
144
  return styled_error("Please select a quantization model like GPTQ, AWQ etc.")
145
 
146
+ model_params, model_size = get_quantized_model_parameters_memory(model_info,
147
+ quant_method=quant_type.lower(),
148
+ bits=precision)
 
 
 
 
 
 
149
 
150
  if quant_type == "llama.cpp":
151
  hardware = "cpu"
 
154
  else:
155
  hardware = "gpu"
156
 
 
 
 
157
  if hardware == "gpu" and compute_dtype == "bfloat16":
158
  compute_dtype = "float16"
159
 
 
189
  "created_at": created_at
190
  }
191
  print(eval_entry)
 
192
 
193
  # ToDo: need open
194
  # Check for duplicate submission