gsaivinay commited on
Commit
c4c8150
1 Parent(s): 6b66fa3
app.py CHANGED
@@ -218,22 +218,14 @@ def change_tab(query_param: str):
218
  # Searching and filtering
219
  def update_table(hidden_df: pd.DataFrame, current_columns_df: pd.DataFrame, columns: list, type_query: list, size_query: list, show_deleted: bool, query: str):
220
  filtered_df = filter_models(hidden_df, type_query, size_query, show_deleted)
221
- df = search_table(filtered_df, current_columns_df, query)
222
- df = select_columns(df, columns)
 
223
 
224
  return df
225
 
226
- def search_table(df: pd.DataFrame, current_columns_df: pd.DataFrame, query: str) -> pd.DataFrame:
227
- current_columns = current_columns_df.columns
228
- if AutoEvalColumn.model_type.name in current_columns:
229
- filtered_df = df[
230
- (df[AutoEvalColumn.dummy.name].str.contains(query, case=False))
231
- | (df[AutoEvalColumn.model_type.name].str.contains(query, case=False))
232
- ]
233
- else:
234
- filtered_df = df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
235
-
236
- return filtered_df
237
 
238
  def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
239
  always_here_cols = [
@@ -247,12 +239,13 @@ def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
247
  return filtered_df
248
 
249
  NUMERIC_INTERVALS = {
250
- "< 1.5B": (0, 1.5),
251
- "~3B": (1.5, 5),
252
- "~7B": (6, 11),
253
- "~13B": (12, 15),
254
- "~35B": (16, 55),
255
- "60B+": (55, 10000),
 
256
  }
257
 
258
  def filter_models(
@@ -267,9 +260,10 @@ def filter_models(
267
  type_emoji = [t[0] for t in type_query]
268
  filtered_df = filtered_df[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
269
 
270
- numeric_interval = [NUMERIC_INTERVALS[s] for s in size_query]
271
  params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
272
- filtered_df = filtered_df.loc[params_column.between(numeric_interval[0][0], numeric_interval[-1][1])]
 
273
 
274
  return filtered_df
275
 
 
218
  # Searching and filtering
219
  def update_table(hidden_df: pd.DataFrame, current_columns_df: pd.DataFrame, columns: list, type_query: list, size_query: list, show_deleted: bool, query: str):
220
  filtered_df = filter_models(hidden_df, type_query, size_query, show_deleted)
221
+ if query != "":
222
+ filtered_df = search_table(filtered_df, query)
223
+ df = select_columns(filtered_df, columns)
224
 
225
  return df
226
 
227
+ def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
228
+ return df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
 
 
 
 
 
 
 
 
 
229
 
230
  def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
231
  always_here_cols = [
 
239
  return filtered_df
240
 
241
  NUMERIC_INTERVALS = {
242
+ "Unknown": pd.Interval(-1, 0, closed="right"),
243
+ "< 1.5B": pd.Interval(0, 1.5, closed="right"),
244
+ "~3B": pd.Interval(1.5, 5, closed="right"),
245
+ "~7B": pd.Interval(6, 11, closed="right"),
246
+ "~13B": pd.Interval(12, 15, closed="right"),
247
+ "~35B": pd.Interval(16, 55, closed="right"),
248
+ "60B+": pd.Interval(55, 10000, closed="right"),
249
  }
250
 
251
  def filter_models(
 
260
  type_emoji = [t[0] for t in type_query]
261
  filtered_df = filtered_df[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
262
 
263
+ numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
264
  params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
265
+ mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))
266
+ filtered_df = filtered_df.loc[mask]
267
 
268
  return filtered_df
269
 
model_info_cache.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c307938f15bda18b6c38af3d02cc0407d9d8d5345bc31f475af2cbbb33a4f8b5
3
+ size 2895750
src/display_models/get_model_metadata.py CHANGED
@@ -2,6 +2,7 @@ import glob
2
  import json
3
  import os
4
  import re
 
5
  from typing import List
6
 
7
  import huggingface_hub
@@ -16,27 +17,43 @@ api = HfApi(token=os.environ.get("H4_TOKEN", None))
16
 
17
 
18
  def get_model_infos_from_hub(leaderboard_data: List[dict]):
 
 
 
 
 
 
 
19
  for model_data in tqdm(leaderboard_data):
20
  model_name = model_data["model_name_for_query"]
21
- try:
22
- model_info = api.model_info(model_name)
23
- except huggingface_hub.utils._errors.RepositoryNotFoundError:
24
- print("Repo not found!", model_name)
25
- model_data[AutoEvalColumn.license.name] = None
26
- model_data[AutoEvalColumn.likes.name] = None
27
- model_data[AutoEvalColumn.params.name] = get_model_size(model_name, None)
28
- continue
 
 
 
 
 
29
 
30
  model_data[AutoEvalColumn.license.name] = get_model_license(model_info)
31
  model_data[AutoEvalColumn.likes.name] = get_model_likes(model_info)
32
  model_data[AutoEvalColumn.params.name] = get_model_size(model_name, model_info)
 
 
 
 
33
 
34
 
35
  def get_model_license(model_info):
36
  try:
37
  return model_info.cardData["license"]
38
  except Exception:
39
- return None
40
 
41
 
42
  def get_model_likes(model_info):
@@ -56,7 +73,7 @@ def get_model_size(model_name, model_info):
56
  size = size_match.group(0)
57
  return round(float(size[:-1]) if size[-1] == "b" else float(size[:-1]) / 1e3, 3)
58
  except AttributeError:
59
- return None
60
 
61
 
62
  def get_model_type(leaderboard_data: List[dict]):
 
2
  import json
3
  import os
4
  import re
5
+ import pickle
6
  from typing import List
7
 
8
  import huggingface_hub
 
17
 
18
 
19
  def get_model_infos_from_hub(leaderboard_data: List[dict]):
20
+ # load cache from disk
21
+ try:
22
+ with open("model_info_cache.pkl", "rb") as f:
23
+ model_info_cache = pickle.load(f)
24
+ except EOFError:
25
+ model_info_cache = {}
26
+
27
  for model_data in tqdm(leaderboard_data):
28
  model_name = model_data["model_name_for_query"]
29
+
30
+ if model_name in model_info_cache:
31
+ model_info = model_info_cache[model_name]
32
+ else:
33
+ try:
34
+ model_info = api.model_info(model_name)
35
+ model_info_cache[model_name] = model_info
36
+ except huggingface_hub.utils._errors.RepositoryNotFoundError:
37
+ print("Repo not found!", model_name)
38
+ model_data[AutoEvalColumn.license.name] = None
39
+ model_data[AutoEvalColumn.likes.name] = None
40
+ model_data[AutoEvalColumn.params.name] = get_model_size(model_name, None)
41
+ continue
42
 
43
  model_data[AutoEvalColumn.license.name] = get_model_license(model_info)
44
  model_data[AutoEvalColumn.likes.name] = get_model_likes(model_info)
45
  model_data[AutoEvalColumn.params.name] = get_model_size(model_name, model_info)
46
+
47
+ # save cache to disk in pickle format
48
+ with open("model_info_cache.pkl", "wb") as f:
49
+ pickle.dump(model_info_cache, f)
50
 
51
 
52
  def get_model_license(model_info):
53
  try:
54
  return model_info.cardData["license"]
55
  except Exception:
56
+ return "?"
57
 
58
 
59
  def get_model_likes(model_info):
 
73
  size = size_match.group(0)
74
  return round(float(size[:-1]) if size[-1] == "b" else float(size[:-1]) / 1e3, 3)
75
  except AttributeError:
76
+ return 0
77
 
78
 
79
  def get_model_type(leaderboard_data: List[dict]):
src/display_models/model_metadata_type.py CHANGED
@@ -22,6 +22,8 @@ class ModelType(Enum):
22
 
23
  MODEL_TYPE_METADATA: Dict[str, ModelType] = {
24
  "tiiuae/falcon-180B": ModelType.PT,
 
 
25
  "Qwen/Qwen-7B": ModelType.PT,
26
  "Qwen/Qwen-7B-Chat": ModelType.RL,
27
  "notstoic/PygmalionCoT-7b": ModelType.IFT,
 
22
 
23
  MODEL_TYPE_METADATA: Dict[str, ModelType] = {
24
  "tiiuae/falcon-180B": ModelType.PT,
25
+ "tiiuae/falcon-180B-chat": ModelType.RL,
26
+ "microsoft/phi-1_5": ModelType.PT,
27
  "Qwen/Qwen-7B": ModelType.PT,
28
  "Qwen/Qwen-7B-Chat": ModelType.RL,
29
  "notstoic/PygmalionCoT-7b": ModelType.IFT,
src/display_models/read_results.py CHANGED
@@ -27,7 +27,7 @@ class EvalResult:
27
  results: dict
28
  precision: str = ""
29
  model_type: str = ""
30
- weight_type: str = ""
31
  date: str = ""
32
 
33
  def to_dict(self):
 
27
  results: dict
28
  precision: str = ""
29
  model_type: str = ""
30
+ weight_type: str = "Original"
31
  date: str = ""
32
 
33
  def to_dict(self):