gsaivinay commited on
Commit
1c7d9c0
β€’
2 Parent(s): 51678bf e5cbf2a

Merge branch 'main' of https://huggingface.co/spaces/gsaivinay/open_llm_leaderboard

Browse files
app.py CHANGED
@@ -109,6 +109,8 @@ leaderboard_df = original_df.copy()
109
  pending_eval_queue_df,
110
  ) = get_evaluation_queue_df(eval_queue, eval_queue_private, EVAL_REQUESTS_PATH, EVAL_COLS)
111
 
 
 
112
 
113
  ## INTERACTION FUNCTIONS
114
  def add_new_eval(
@@ -211,6 +213,8 @@ def change_tab(query_param: str):
211
 
212
 
213
  # Searching and filtering
 
 
214
  def update_table(hidden_df: pd.DataFrame, current_columns_df: pd.DataFrame, columns: list, type_query: list, precision_query: str, size_query: list, show_deleted: bool, query: str):
215
  filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
216
  if query != "":
@@ -245,6 +249,7 @@ NUMERIC_INTERVALS = {
245
 
246
  def filter_models(
247
  df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
 
248
  ) -> pd.DataFrame:
249
  # Show all models
250
  if show_deleted:
@@ -273,6 +278,12 @@ with demo:
273
  with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
274
  with gr.Row():
275
  with gr.Column():
 
 
 
 
 
 
276
  with gr.Row():
277
  search_bar = gr.Textbox(
278
  placeholder=" πŸ” Search for your model and press ENTER...",
@@ -339,6 +350,13 @@ with demo:
339
  interactive=True,
340
  elem_id="filter-columns-precision",
341
  )
 
 
 
 
 
 
 
342
  filter_columns_size = gr.CheckboxGroup(
343
  label="Model sizes",
344
  choices=list(NUMERIC_INTERVALS.keys()),
@@ -382,6 +400,7 @@ with demo:
382
  shown_columns,
383
  filter_columns_type,
384
  filter_columns_precision,
 
385
  filter_columns_size,
386
  deleted_models_visibility,
387
  search_bar,
@@ -396,6 +415,7 @@ with demo:
396
  shown_columns,
397
  filter_columns_type,
398
  filter_columns_precision,
 
399
  filter_columns_size,
400
  deleted_models_visibility,
401
  search_bar,
@@ -418,6 +438,22 @@ with demo:
418
  leaderboard_table,
419
  queue=True,
420
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
  filter_columns_precision.change(
422
  update_table,
423
  [
@@ -441,6 +477,7 @@ with demo:
441
  shown_columns,
442
  filter_columns_type,
443
  filter_columns_precision,
 
444
  filter_columns_size,
445
  deleted_models_visibility,
446
  search_bar,
@@ -456,6 +493,7 @@ with demo:
456
  shown_columns,
457
  filter_columns_type,
458
  filter_columns_precision,
 
459
  filter_columns_size,
460
  deleted_models_visibility,
461
  search_bar,
 
109
  pending_eval_queue_df,
110
  ) = get_evaluation_queue_df(eval_queue, eval_queue_private, EVAL_REQUESTS_PATH, EVAL_COLS)
111
 
112
+ print(leaderboard_df["Precision"].unique())
113
+
114
 
115
  ## INTERACTION FUNCTIONS
116
  def add_new_eval(
 
213
 
214
 
215
  # Searching and filtering
216
+ def update_table(hidden_df: pd.DataFrame, current_columns_df: pd.DataFrame, columns: list, type_query: list, precision_query: str, size_query: list, show_deleted: bool, query: str):
217
+ filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
218
  def update_table(hidden_df: pd.DataFrame, current_columns_df: pd.DataFrame, columns: list, type_query: list, precision_query: str, size_query: list, show_deleted: bool, query: str):
219
  filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
220
  if query != "":
 
249
 
250
  def filter_models(
251
  df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
252
+ df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
253
  ) -> pd.DataFrame:
254
  # Show all models
255
  if show_deleted:
 
278
  with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
279
  with gr.Row():
280
  with gr.Column():
281
+ with gr.Row():
282
+ search_bar = gr.Textbox(
283
+ placeholder=" πŸ” Search for your model and press ENTER...",
284
+ show_label=False,
285
+ elem_id="search-bar",
286
+ )
287
  with gr.Row():
288
  search_bar = gr.Textbox(
289
  placeholder=" πŸ” Search for your model and press ENTER...",
 
350
  interactive=True,
351
  elem_id="filter-columns-precision",
352
  )
353
+ filter_columns_precision = gr.CheckboxGroup(
354
+ label="Precision",
355
+ choices=["torch.float16", "torch.bfloat16", "torch.float32", "8bit", "4bit", "GPTQ"],
356
+ value=["torch.float16", "torch.bfloat16", "torch.float32", "8bit", "4bit", "GPTQ"],
357
+ interactive=True,
358
+ elem_id="filter-columns-precision",
359
+ )
360
  filter_columns_size = gr.CheckboxGroup(
361
  label="Model sizes",
362
  choices=list(NUMERIC_INTERVALS.keys()),
 
400
  shown_columns,
401
  filter_columns_type,
402
  filter_columns_precision,
403
+ filter_columns_precision,
404
  filter_columns_size,
405
  deleted_models_visibility,
406
  search_bar,
 
415
  shown_columns,
416
  filter_columns_type,
417
  filter_columns_precision,
418
+ filter_columns_precision,
419
  filter_columns_size,
420
  deleted_models_visibility,
421
  search_bar,
 
438
  leaderboard_table,
439
  queue=True,
440
  )
441
+ filter_columns_precision.change(
442
+ update_table,
443
+ [
444
+ hidden_leaderboard_table_for_search,
445
+ leaderboard_table,
446
+ shown_columns,
447
+ filter_columns_type,
448
+ filter_columns_precision,
449
+ filter_columns_precision,
450
+ filter_columns_size,
451
+ deleted_models_visibility,
452
+ search_bar,
453
+ ],
454
+ leaderboard_table,
455
+ queue=True,
456
+ )
457
  filter_columns_precision.change(
458
  update_table,
459
  [
 
477
  shown_columns,
478
  filter_columns_type,
479
  filter_columns_precision,
480
+ filter_columns_precision,
481
  filter_columns_size,
482
  deleted_models_visibility,
483
  search_bar,
 
493
  shown_columns,
494
  filter_columns_type,
495
  filter_columns_precision,
496
+ filter_columns_precision,
497
  filter_columns_size,
498
  deleted_models_visibility,
499
  search_bar,
src/assets/text_content.py CHANGED
@@ -1,7 +1,7 @@
1
  from src.display_models.model_metadata_type import ModelType
2
 
3
  TITLE = """<h1 align="center" id="space-title">πŸ€— Open LLM Leaderboard</h1>
4
- <h2 align="center" id="space-title">This space displays GPT-4 and GPT-3.5 scores from [techinal paper](https://cdn.openai.com/papers/gpt-4.pdf)</h2>"""
5
 
6
  INTRODUCTION_TEXT = """
7
  πŸ“ The πŸ€— Open LLM Leaderboard aims to track, rank and evaluate open LLMs and chatbots.
 
1
  from src.display_models.model_metadata_type import ModelType
2
 
3
  TITLE = """<h1 align="center" id="space-title">πŸ€— Open LLM Leaderboard</h1>
4
+ <h2 align="center" id="space-title">This space displays GPT-4 and GPT-3.5 scores from <a href="https://cdn.openai.com/papers/gpt-4.pdf" target="_blank" rel="noopener noreferrer">techinal paper</a></h2>"""
5
 
6
  INTRODUCTION_TEXT = """
7
  πŸ“ The πŸ€— Open LLM Leaderboard aims to track, rank and evaluate open LLMs and chatbots.
src/display_models/get_model_metadata.py CHANGED
@@ -10,6 +10,8 @@ from huggingface_hub import HfApi
10
  from tqdm import tqdm
11
  from transformers import AutoModel, AutoConfig
12
  from accelerate import init_empty_weights
 
 
13
 
14
  from src.display_models.model_metadata_flags import DO_NOT_SUBMIT_MODELS, FLAGGED_MODELS
15
  from src.display_models.model_metadata_type import MODEL_TYPE_METADATA, ModelType, model_type_from_str
@@ -23,6 +25,7 @@ def get_model_infos_from_hub(leaderboard_data: List[dict]):
23
  try:
24
  with open("model_info_cache.pkl", "rb") as f:
25
  model_info_cache = pickle.load(f)
 
26
  except (EOFError, FileNotFoundError):
27
  model_info_cache = {}
28
  try:
@@ -30,6 +33,11 @@ def get_model_infos_from_hub(leaderboard_data: List[dict]):
30
  model_size_cache = pickle.load(f)
31
  except (EOFError, FileNotFoundError):
32
  model_size_cache = {}
 
 
 
 
 
33
 
34
  for model_data in tqdm(leaderboard_data):
35
  model_name = model_data["model_name_for_query"]
@@ -47,18 +55,26 @@ def get_model_infos_from_hub(leaderboard_data: List[dict]):
47
  if model_name not in model_size_cache:
48
  model_size_cache[model_name] = get_model_size(model_name, None)
49
  model_data[AutoEvalColumn.params.name] = model_size_cache[model_name]
 
 
 
50
 
51
  model_data[AutoEvalColumn.license.name] = get_model_license(model_info)
52
  model_data[AutoEvalColumn.likes.name] = get_model_likes(model_info)
53
  if model_name not in model_size_cache:
54
  model_size_cache[model_name] = get_model_size(model_name, model_info)
55
  model_data[AutoEvalColumn.params.name] = model_size_cache[model_name]
 
 
 
56
 
57
  # save cache to disk in pickle format
58
  with open("model_info_cache.pkl", "wb") as f:
59
  pickle.dump(model_info_cache, f)
60
  with open("model_size_cache.pkl", "wb") as f:
61
  pickle.dump(model_size_cache, f)
 
 
62
 
63
 
64
  def get_model_license(model_info):
 
10
  from tqdm import tqdm
11
  from transformers import AutoModel, AutoConfig
12
  from accelerate import init_empty_weights
13
+ from transformers import AutoModel, AutoConfig
14
+ from accelerate import init_empty_weights
15
 
16
  from src.display_models.model_metadata_flags import DO_NOT_SUBMIT_MODELS, FLAGGED_MODELS
17
  from src.display_models.model_metadata_type import MODEL_TYPE_METADATA, ModelType, model_type_from_str
 
25
  try:
26
  with open("model_info_cache.pkl", "rb") as f:
27
  model_info_cache = pickle.load(f)
28
+ except (EOFError, FileNotFoundError):
29
  except (EOFError, FileNotFoundError):
30
  model_info_cache = {}
31
  try:
 
33
  model_size_cache = pickle.load(f)
34
  except (EOFError, FileNotFoundError):
35
  model_size_cache = {}
36
+ try:
37
+ with open("model_size_cache.pkl", "rb") as f:
38
+ model_size_cache = pickle.load(f)
39
+ except (EOFError, FileNotFoundError):
40
+ model_size_cache = {}
41
 
42
  for model_data in tqdm(leaderboard_data):
43
  model_name = model_data["model_name_for_query"]
 
55
  if model_name not in model_size_cache:
56
  model_size_cache[model_name] = get_model_size(model_name, None)
57
  model_data[AutoEvalColumn.params.name] = model_size_cache[model_name]
58
+ if model_name not in model_size_cache:
59
+ model_size_cache[model_name] = get_model_size(model_name, None)
60
+ model_data[AutoEvalColumn.params.name] = model_size_cache[model_name]
61
 
62
  model_data[AutoEvalColumn.license.name] = get_model_license(model_info)
63
  model_data[AutoEvalColumn.likes.name] = get_model_likes(model_info)
64
  if model_name not in model_size_cache:
65
  model_size_cache[model_name] = get_model_size(model_name, model_info)
66
  model_data[AutoEvalColumn.params.name] = model_size_cache[model_name]
67
+ if model_name not in model_size_cache:
68
+ model_size_cache[model_name] = get_model_size(model_name, model_info)
69
+ model_data[AutoEvalColumn.params.name] = model_size_cache[model_name]
70
 
71
  # save cache to disk in pickle format
72
  with open("model_info_cache.pkl", "wb") as f:
73
  pickle.dump(model_info_cache, f)
74
  with open("model_size_cache.pkl", "wb") as f:
75
  pickle.dump(model_size_cache, f)
76
+ with open("model_size_cache.pkl", "wb") as f:
77
+ pickle.dump(model_size_cache, f)
78
 
79
 
80
  def get_model_license(model_info):