Spaces:
Running
Running
types fix + mmluproru
Browse files- data/leaderboard.json +1 -1
- src/display/utils.py +3 -3
- src/leaderboard/build_leaderboard.py +4 -2
data/leaderboard.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
[{"musicmc": 0.2936170212765957, "lawmc": 0.48094747682801237, "model": "apsys/saiga_3_8b", "moviesmc": 0.3402777777777778, "booksmc": 0.3112033195020747, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.2872340425531915, "lawmc": 0.5066941297631308, "model": "vikhr-52-7b-chat-hf/apsys", "moviesmc": 0.4837962962962963, "booksmc": 0.3070539419087137, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.2936170212765957, "lawmc": 0.5345005149330587, "model": "RefalMachine/llama3 ushanka", "moviesmc": 0.35185185185185186, "booksmc": 0.3257261410788382, "model_dtype": "torch.bfloat16", "ppl": 0}, {"musicmc": 0.3021276595744681, "lawmc": 0.544799176107106, "model": "IlyaGusev/saiga_llama3_8b", "moviesmc": 0.3958333333333333, "booksmc": 0.3381742738589212, "model_dtype": "torch.bfloat16", "ppl": 0}, {"musicmc": 0.251063829787234, "lawmc": 0.48712667353244077, "model": "apsys/vikhr-52-7b", "moviesmc": 0.4212962962962963, "booksmc": 0.3112033195020747, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.24468085106382978, "lawmc": 0.4788877445932029, "model": "apsys/vikhr-53-7b-32k", "moviesmc": 0.4050925925925926, "booksmc": 0.3049792531120332, "model_dtype": "torch.float16", "ppl": 0}]
|
|
|
1 |
+
[{"musicmc": 0.2936170212765957, "lawmc": 0.48094747682801237, "model": "apsys/saiga_3_8b", "moviesmc": 0.3402777777777778, "booksmc": 0.3112033195020747, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.2723404255319149, "lawmc": 0.4850669412976313, "model": "Nexusflow/Starling-LM-7B-beta", "moviesmc": 0.38657407407407407, "booksmc": 0.3070539419087137, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.20851063829787234, "lawmc": 0.47167868177136973, "model": "Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R", "moviesmc": 0.3055555555555556, "booksmc": 0.26141078838174275, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.3021276595744681, "lawmc": 0.544799176107106, "model": "alexwortega/saiga_submit", "moviesmc": 0.3958333333333333, "booksmc": 0.3381742738589212, "model_dtype": "torch.bfloat16", "ppl": 0}, {"musicmc": 0.2872340425531915, "lawmc": 0.5066941297631308, "model": "vikhr-52-7b-chat-hf/apsys", "moviesmc": 0.4837962962962963, "booksmc": 0.3070539419087137, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.20851063829787234, "lawmc": 0.42636457260556127, "model": "cohere/aya-8b", "moviesmc": 0.3287037037037037, "booksmc": 0.24273858921161826, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.24680851063829787, "lawmc": 0.48712667353244077, "model": "lightblue/suzume-llama-3-8B-multilingual", "moviesmc": 0.3587962962962963, "booksmc": 0.2966804979253112, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.2936170212765957, "lawmc": 0.5345005149330587, "model": "RefalMachine/llama3 ushanka", "moviesmc": 0.35185185185185186, "booksmc": 0.3257261410788382, "model_dtype": "torch.bfloat16", "ppl": 0}, {"musicmc": 0.28297872340425534, "lawmc": 0.5406797116374872, "model": "microsoft/Phi-3-medium-4k-instruct", "moviesmc": 0.42824074074074076, "booksmc": 0.3817427385892116, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.3021276595744681, "lawmc": 0.544799176107106, "model": "IlyaGusev/saiga_llama3_8b", "moviesmc": 0.3958333333333333, "booksmc": 0.3381742738589212, "model_dtype": "torch.bfloat16", "ppl": 0}, {"musicmc": 0.251063829787234, "lawmc": 0.48712667353244077, "model": "apsys/vikhr-52-7b", "moviesmc": 0.4212962962962963, "booksmc": 0.3112033195020747, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.24468085106382978, "lawmc": 0.4788877445932029, "model": "apsys/vikhr-53-7b-32k", "moviesmc": 0.4050925925925926, "booksmc": 0.3049792531120332, "model_dtype": "torch.float16", "ppl": 0}]
|
src/display/utils.py
CHANGED
@@ -78,9 +78,9 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("model", "ma
|
|
78 |
for task in Tasks:
|
79 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
80 |
# # Model information
|
81 |
-
auto_eval_column_dict.append(["avg", ColumnContent, ColumnContent("
|
82 |
-
auto_eval_column_dict.append(["ppl", ColumnContent, ColumnContent("
|
83 |
-
auto_eval_column_dict.append(["model_dtype", ColumnContent, ColumnContent("
|
84 |
# auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
85 |
# auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
86 |
# auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
|
|
|
78 |
for task in Tasks:
|
79 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
80 |
# # Model information
|
81 |
+
auto_eval_column_dict.append(["avg", ColumnContent, ColumnContent("avg", "number", 1,0,1)])
|
82 |
+
auto_eval_column_dict.append(["ppl", ColumnContent, ColumnContent("ppl", "number", 0)])
|
83 |
+
auto_eval_column_dict.append(["model_dtype", ColumnContent, ColumnContent("model_dtype", "number", 0)])
|
84 |
# auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
85 |
# auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
86 |
# auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
|
src/leaderboard/build_leaderboard.py
CHANGED
@@ -72,8 +72,10 @@ def build_leadearboard_df():
|
|
72 |
else:
|
73 |
df['mmluproru'] = 0
|
74 |
leaderboard_df = df[['model','mmluproru','moviesmc','musicmc','lawmc','booksmc','model_dtype','ppl']]
|
75 |
-
leaderboard_df['avg'] = leaderboard_df[['moviesmc','musicmc','lawmc','booksmc','mmluproru']].mean(axis=1)
|
76 |
-
leaderboard_df.
|
|
|
77 |
numeric_cols = leaderboard_df.select_dtypes(include=['number']).columns
|
|
|
78 |
leaderboard_df[numeric_cols] = leaderboard_df[numeric_cols].round(3)
|
79 |
return leaderboard_df.copy()
|
|
|
72 |
else:
|
73 |
df['mmluproru'] = 0
|
74 |
leaderboard_df = df[['model','mmluproru','moviesmc','musicmc','lawmc','booksmc','model_dtype','ppl']]
|
75 |
+
leaderboard_df['avg'] = leaderboard_df[['moviesmc','musicmc','lawmc','booksmc','mmluproru']].mean(axis=1).values
|
76 |
+
# print(leaderboard_df.columns)
|
77 |
+
leaderboard_df.sort_values(by='avg',ascending=False,inplace=True,axis=0)
|
78 |
numeric_cols = leaderboard_df.select_dtypes(include=['number']).columns
|
79 |
+
# print(numeric_cols)
|
80 |
leaderboard_df[numeric_cols] = leaderboard_df[numeric_cols].round(3)
|
81 |
return leaderboard_df.copy()
|