Spaces:
Running
Running
types fix + mmluproru
Browse files- app.py +1 -1
- src/display/utils.py +2 -1
- src/leaderboard/build_leaderboard.py +7 -3
app.py
CHANGED
@@ -185,7 +185,7 @@ def update_board():
|
|
185 |
# shutil.rmtree("./data")
|
186 |
download_dataset("Vikhrmodels/s-openbench-eval", "m_data")
|
187 |
import glob
|
188 |
-
data_list = [{"musicmc": 0.3021276595744681, "lawmc": 0.2800829875518672, "model": "apsys/saiga_3_8b", "moviesmc": 0.3472222222222222, "booksmc": 0.2800829875518672, "model_dtype": "torch.float16", "ppl": 0}]
|
189 |
for file in glob.glob("./m_data/model_data/external/*.json"):
|
190 |
with open(file) as f:
|
191 |
|
|
|
185 |
# shutil.rmtree("./data")
|
186 |
download_dataset("Vikhrmodels/s-openbench-eval", "m_data")
|
187 |
import glob
|
188 |
+
data_list = [{"musicmc": 0.3021276595744681, "lawmc": 0.2800829875518672, "model": "apsys/saiga_3_8b", "moviesmc": 0.3472222222222222, "booksmc": 0.2800829875518672, "model_dtype": "torch.float16", "ppl": 0, 'mmluproru':0}]
|
189 |
for file in glob.glob("./m_data/model_data/external/*.json"):
|
190 |
with open(file) as f:
|
191 |
|
src/display/utils.py
CHANGED
@@ -53,6 +53,7 @@ class Tasks(Enum):
|
|
53 |
movies = Task("moviesmc", "acc", "moviesmc")
|
54 |
music = Task("musicmc", "acc", "musicmc")
|
55 |
law = Task("lawmc", "acc", "lawmc")
|
|
|
56 |
|
57 |
|
58 |
# These classes are for user facing column names,
|
@@ -77,7 +78,7 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("model", "ma
|
|
77 |
for task in Tasks:
|
78 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
79 |
# # Model information
|
80 |
-
auto_eval_column_dict.append(["avg", ColumnContent, ColumnContent("Type", "number", 0)])
|
81 |
auto_eval_column_dict.append(["ppl", ColumnContent, ColumnContent("Type", "number", 0)])
|
82 |
auto_eval_column_dict.append(["model_dtype", ColumnContent, ColumnContent("Type", "number", 0)])
|
83 |
# auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
|
|
53 |
movies = Task("moviesmc", "acc", "moviesmc")
|
54 |
music = Task("musicmc", "acc", "musicmc")
|
55 |
law = Task("lawmc", "acc", "lawmc")
|
56 |
+
mmluproru = Task("mmluproru", "acc", "mmluproru")
|
57 |
|
58 |
|
59 |
# These classes are for user facing column names,
|
|
|
78 |
for task in Tasks:
|
79 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
80 |
# # Model information
|
81 |
+
auto_eval_column_dict.append(["avg ⬆️", ColumnContent, ColumnContent("Type", "number", 1,0,1)])
|
82 |
auto_eval_column_dict.append(["ppl", ColumnContent, ColumnContent("Type", "number", 0)])
|
83 |
auto_eval_column_dict.append(["model_dtype", ColumnContent, ColumnContent("Type", "number", 0)])
|
84 |
# auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
src/leaderboard/build_leaderboard.py
CHANGED
@@ -66,9 +66,13 @@ def build_leadearboard_df():
|
|
66 |
with open(f"{os.path.abspath(DATA_PATH)}/leaderboard.json", "r", encoding="utf-8") as eval_file:
|
67 |
f=json.load(eval_file)
|
68 |
print(f)
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
|
|
72 |
numeric_cols = leaderboard_df.select_dtypes(include=['number']).columns
|
73 |
leaderboard_df[numeric_cols] = leaderboard_df[numeric_cols].round(3)
|
74 |
return leaderboard_df.copy()
|
|
|
66 |
with open(f"{os.path.abspath(DATA_PATH)}/leaderboard.json", "r", encoding="utf-8") as eval_file:
|
67 |
f=json.load(eval_file)
|
68 |
print(f)
|
69 |
+
df = pd.DataFrame.from_records(f)
|
70 |
+
if 'mmluproru' in list(df.columns):
|
71 |
+
df['mmluproru'] = df['mmluproru'].fillna(0)
|
72 |
+
else:
|
73 |
+
df['mmluproru'] = 0
|
74 |
+
leaderboard_df = [['model','mmluproru','moviesmc','musicmc','lawmc','booksmc','model_dtype','ppl']]
|
75 |
+
leaderboard_df['avg'] = leaderboard_df[['moviesmc','musicmc','lawmc','booksmc','mmluproru']].mean(axis=1)
|
76 |
numeric_cols = leaderboard_df.select_dtypes(include=['number']).columns
|
77 |
leaderboard_df[numeric_cols] = leaderboard_df[numeric_cols].round(3)
|
78 |
return leaderboard_df.copy()
|