Update space
Browse files- src/display/utils.py +1 -1
- src/leaderboard/read_evals.py +6 -4
- src/populate.py +1 -1
src/display/utils.py
CHANGED
@@ -59,7 +59,7 @@ for domain in Domains:
|
|
59 |
|
60 |
auto_eval_column_dict.append(["organization", ColumnContent, field(default_factory=lambda: ColumnContent("Organization", "str", False))])
|
61 |
auto_eval_column_dict.append(["knowledge_cutoff", ColumnContent, field(default_factory=lambda: ColumnContent("Knowledge cutoff", "str", False))])
|
62 |
-
|
63 |
|
64 |
for task in Tasks:
|
65 |
auto_eval_column_dict.append([task.name, ColumnContent, field(default_factory=lambda: ColumnContent(task.value.col_name, "number", True))])
|
|
|
59 |
|
60 |
auto_eval_column_dict.append(["organization", ColumnContent, field(default_factory=lambda: ColumnContent("Organization", "str", False))])
|
61 |
auto_eval_column_dict.append(["knowledge_cutoff", ColumnContent, field(default_factory=lambda: ColumnContent("Knowledge cutoff", "str", False))])
|
62 |
+
auto_eval_column_dict.append(["score", ColumnContent, field(default_factory=lambda: ColumnContent("Score", "number", True))])
|
63 |
|
64 |
for task in Tasks:
|
65 |
auto_eval_column_dict.append([task.name, ColumnContent, field(default_factory=lambda: ColumnContent(task.value.col_name, "number", True))])
|
src/leaderboard/read_evals.py
CHANGED
@@ -34,11 +34,13 @@ class ModelResult:
|
|
34 |
license = config.get("license")
|
35 |
knowledge_cutoff = config.get("knowledge_cutoff")
|
36 |
|
|
|
|
|
37 |
# Extract results available in this file (some results are split in several files)
|
38 |
results = {}
|
39 |
for domain in Domains:
|
40 |
domain = domain.value
|
41 |
-
results[domain.dimension] =
|
42 |
|
43 |
return self(
|
44 |
eval_name=f"{org}_{model}",
|
@@ -53,13 +55,13 @@ class ModelResult:
|
|
53 |
def to_dict(self):
|
54 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
55 |
|
56 |
-
#
|
57 |
-
average = 1
|
58 |
# average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
59 |
data_dict = {
|
60 |
-
"eval_name": self.eval_name, # not a column, just a save name,
|
61 |
# AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
62 |
AutoEvalColumn.model.name: self.model,
|
|
|
63 |
AutoEvalColumn.license.name: self.license,
|
64 |
AutoEvalColumn.organization.name: self.org,
|
65 |
AutoEvalColumn.knowledge_cutoff.name: self.knowledge_cutoff,
|
|
|
34 |
license = config.get("license")
|
35 |
knowledge_cutoff = config.get("knowledge_cutoff")
|
36 |
|
37 |
+
model_results = data.get("results")
|
38 |
+
|
39 |
# Extract results available in this file (some results are split in several files)
|
40 |
results = {}
|
41 |
for domain in Domains:
|
42 |
domain = domain.value
|
43 |
+
results[domain.dimension] = model_results.get(domain.dimension).get(domain.metric, None)
|
44 |
|
45 |
return self(
|
46 |
eval_name=f"{org}_{model}",
|
|
|
55 |
def to_dict(self):
|
56 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
57 |
|
58 |
+
# score = 1 / self.results[Domains.dim0.dimension] if self.results[Domains.dim0.dimension] != 0 else 0
|
|
|
59 |
# average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
60 |
data_dict = {
|
61 |
+
# "eval_name": self.eval_name, # not a column, just a save name,
|
62 |
# AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
63 |
AutoEvalColumn.model.name: self.model,
|
64 |
+
AutoEvalColumn.score.name: self.results[Domains.dim0.value.dimension],
|
65 |
AutoEvalColumn.license.name: self.license,
|
66 |
AutoEvalColumn.organization.name: self.org,
|
67 |
AutoEvalColumn.knowledge_cutoff.name: self.knowledge_cutoff,
|
src/populate.py
CHANGED
@@ -14,7 +14,7 @@ def get_model_leaderboard_df(results_path: str, requests_path: str="", cols: lis
|
|
14 |
all_data_json = [v.to_dict() for v in raw_data]
|
15 |
|
16 |
df = pd.DataFrame.from_records(all_data_json)
|
17 |
-
|
18 |
# print(cols) # []
|
19 |
# print(df.columns) # ['eval_name', 'Model', 'Hub License', 'Organization', 'Knowledge cutoff', 'Overall']
|
20 |
# exit()
|
|
|
14 |
all_data_json = [v.to_dict() for v in raw_data]
|
15 |
|
16 |
df = pd.DataFrame.from_records(all_data_json)
|
17 |
+
df = df.sort_values(by=[AutoEvalColumn.score.name], ascending=True)
|
18 |
# print(cols) # []
|
19 |
# print(df.columns) # ['eval_name', 'Model', 'Hub License', 'Organization', 'Knowledge cutoff', 'Overall']
|
20 |
# exit()
|