Spaces:
Paused
Paused
Clémentine
commited on
Commit
·
e295ac3
1
Parent(s):
adb0416
added automatic update of the best LLM models
Browse files- app.py +3 -1
- requirements.txt +1 -1
- src/manage_collections.py +65 -0
app.py
CHANGED
@@ -27,6 +27,7 @@ from src.display_models.utils import (
|
|
27 |
styled_message,
|
28 |
styled_warning,
|
29 |
)
|
|
|
30 |
from src.load_from_hub import get_all_requested_models, get_evaluation_queue_df, get_leaderboard_df, is_model_on_hub
|
31 |
from src.rate_limiting import user_submission_permission
|
32 |
|
@@ -88,6 +89,7 @@ snapshot_download(repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="
|
|
88 |
requested_models, users_to_submission_dates = get_all_requested_models(EVAL_REQUESTS_PATH)
|
89 |
|
90 |
original_df = get_leaderboard_df(EVAL_RESULTS_PATH, COLS, BENCHMARK_COLS)
|
|
|
91 |
leaderboard_df = original_df.copy()
|
92 |
|
93 |
models = original_df["model_name_for_query"].tolist() # needed for model backlinks in their to the leaderboard
|
@@ -306,7 +308,7 @@ def filter_models(
|
|
306 |
if show_deleted:
|
307 |
filtered_df = df
|
308 |
else: # Show only still on the hub models
|
309 |
-
filtered_df = df[df[AutoEvalColumn.still_on_hub.name]
|
310 |
|
311 |
type_emoji = [t[0] for t in type_query]
|
312 |
filtered_df = filtered_df[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
|
|
|
27 |
styled_message,
|
28 |
styled_warning,
|
29 |
)
|
30 |
+
from src.manage_collections import update_collections
|
31 |
from src.load_from_hub import get_all_requested_models, get_evaluation_queue_df, get_leaderboard_df, is_model_on_hub
|
32 |
from src.rate_limiting import user_submission_permission
|
33 |
|
|
|
89 |
requested_models, users_to_submission_dates = get_all_requested_models(EVAL_REQUESTS_PATH)
|
90 |
|
91 |
original_df = get_leaderboard_df(EVAL_RESULTS_PATH, COLS, BENCHMARK_COLS)
|
92 |
+
update_collections(original_df.copy())
|
93 |
leaderboard_df = original_df.copy()
|
94 |
|
95 |
models = original_df["model_name_for_query"].tolist() # needed for model backlinks in their to the leaderboard
|
|
|
308 |
if show_deleted:
|
309 |
filtered_df = df
|
310 |
else: # Show only still on the hub models
|
311 |
+
filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
|
312 |
|
313 |
type_emoji = [t[0] for t in type_query]
|
314 |
filtered_df = filtered_df[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
|
requirements.txt
CHANGED
@@ -25,7 +25,7 @@ gradio-client==0.5.0
|
|
25 |
h11==0.14.0
|
26 |
httpcore==0.17.0
|
27 |
httpx==0.24.0
|
28 |
-
huggingface-hub==0.
|
29 |
idna==3.4
|
30 |
Jinja2==3.1.2
|
31 |
jsonschema==4.17.3
|
|
|
25 |
h11==0.14.0
|
26 |
httpcore==0.17.0
|
27 |
httpx==0.24.0
|
28 |
+
huggingface-hub==0.18.0
|
29 |
idna==3.4
|
30 |
Jinja2==3.1.2
|
31 |
jsonschema==4.17.3
|
src/manage_collections.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pandas as pd
|
3 |
+
from pandas import DataFrame
|
4 |
+
from huggingface_hub import get_collection, add_collection_item, delete_collection_item
|
5 |
+
from huggingface_hub.utils._errors import HfHubHTTPError
|
6 |
+
|
7 |
+
from src.display_models.model_metadata_type import ModelType
|
8 |
+
from src.display_models.utils import AutoEvalColumn
|
9 |
+
|
10 |
+
H4_TOKEN = os.environ.get("H4_TOKEN", None)
|
11 |
+
|
12 |
+
path_to_collection = "HuggingFaceH4/current-best-models-of-the-open-llm-leaderboard-652d64cf619fc62beef5c2a3"
|
13 |
+
intervals = {
|
14 |
+
"1B": pd.Interval(0, 1.5, closed="right"),
|
15 |
+
"3B": pd.Interval(2.5, 3.5, closed="neither"),
|
16 |
+
"7B": pd.Interval(6, 8, closed="neither"),
|
17 |
+
"13B": pd.Interval(10, 14, closed="neither"),
|
18 |
+
"30B":pd.Interval(25, 35, closed="neither"),
|
19 |
+
"60B": pd.Interval(55, 65, closed="neither"),
|
20 |
+
}
|
21 |
+
|
22 |
+
def update_collections(df: DataFrame):
|
23 |
+
"""This function updates the Open LLM Leaderboard model collection with the latest best models for
|
24 |
+
each size category and type.
|
25 |
+
"""
|
26 |
+
params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
|
27 |
+
|
28 |
+
cur_best_models = []
|
29 |
+
|
30 |
+
for type in ModelType:
|
31 |
+
if type.value.name == "": continue
|
32 |
+
for size in intervals:
|
33 |
+
# We filter the df to gather the relevant models
|
34 |
+
type_emoji = [t[0] for t in type.value.symbol]
|
35 |
+
filtered_df = df[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
|
36 |
+
|
37 |
+
numeric_interval = pd.IntervalIndex([intervals[size]])
|
38 |
+
mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))
|
39 |
+
filtered_df = filtered_df.loc[mask]
|
40 |
+
|
41 |
+
best_models = list(filtered_df.sort_values(AutoEvalColumn.average.name, ascending=False)[AutoEvalColumn.dummy.name])
|
42 |
+
|
43 |
+
# We add them one by one to the leaderboard
|
44 |
+
for model in best_models:
|
45 |
+
# We can use collection = get_collection to grab the id of the last item, then place it where we want using update_collection but it's costly...
|
46 |
+
# We could also remove exists_ok to update the note to include the date of apparition of the model for ex.
|
47 |
+
try:
|
48 |
+
add_collection_item(
|
49 |
+
path_to_collection,
|
50 |
+
item_id=model,
|
51 |
+
item_type="model",
|
52 |
+
exists_ok=True,
|
53 |
+
note=f"Best {type.to_str(' ')} model of {size} on the leaderboard today!",
|
54 |
+
token=H4_TOKEN
|
55 |
+
)
|
56 |
+
cur_best_models.append(model)
|
57 |
+
break
|
58 |
+
except HfHubHTTPError:
|
59 |
+
continue
|
60 |
+
|
61 |
+
collection = get_collection(path_to_collection, token=H4_TOKEN)
|
62 |
+
for item in collection.items:
|
63 |
+
if item.item_id not in cur_best_models:
|
64 |
+
delete_collection_item(collection_slug=path_to_collection, item_object_id=item.item_object_id, token=H4_TOKEN)
|
65 |
+
|