Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Fix graphs
Browse files- app.py +5 -3
- src/display/utils.py +1 -0
- src/populate.py +3 -0
- style.css +6 -0
app.py
CHANGED
@@ -165,7 +165,9 @@ def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
|
|
165 |
|
166 |
# 'always_here_cols' γ 'columns' γγι€ε€γγ¦ιθ€γιΏγγ
|
167 |
columns = [c for c in columns if c not in always_here_cols]
|
168 |
-
new_columns =
|
|
|
|
|
169 |
|
170 |
# ιθ€γζι€γγ€γ€ι εΊγηΆζ
|
171 |
seen = set()
|
@@ -306,7 +308,7 @@ def toggle_all_categories(action: str) -> list[gr.CheckboxGroup]:
|
|
306 |
|
307 |
|
308 |
def plot_size_vs_score(df: pd.DataFrame, hidden_df: pd.DataFrame) -> go.Figure:
|
309 |
-
df2 = hidden_df.
|
310 |
df2 = df2[df2["#Params (B)"] > 0]
|
311 |
df2 = df2[["model_name_for_query", "#Params (B)", "AVG", "Few-shot"]]
|
312 |
df2["AVG"] = df2["AVG"].astype(float)
|
@@ -333,7 +335,7 @@ TASK_AVG_NAME_MAP = {
|
|
333 |
|
334 |
|
335 |
def plot_average_scores(df: pd.DataFrame, hidden_df: pd.DataFrame) -> go.Figure:
|
336 |
-
df2 = hidden_df.
|
337 |
df2 = df2[["model_name_for_query", "Few-shot"] + list(TASK_AVG_NAME_MAP.keys())]
|
338 |
df2 = df2.rename(columns={"model_name_for_query": "Model", "Few-shot": "n-shot"})
|
339 |
df2 = df2.rename(columns=TASK_AVG_NAME_MAP)
|
|
|
165 |
|
166 |
# 'always_here_cols' γ 'columns' γγι€ε€γγ¦ιθ€γιΏγγ
|
167 |
columns = [c for c in columns if c not in always_here_cols]
|
168 |
+
new_columns = (
|
169 |
+
always_here_cols + [c for c in COLS if c in df.columns and c in columns] + [AutoEvalColumn.row_id.name]
|
170 |
+
)
|
171 |
|
172 |
# ιθ€γζι€γγ€γ€ι εΊγηΆζ
|
173 |
seen = set()
|
|
|
308 |
|
309 |
|
310 |
def plot_size_vs_score(df: pd.DataFrame, hidden_df: pd.DataFrame) -> go.Figure:
|
311 |
+
df2 = hidden_df[hidden_df[AutoEvalColumn.row_id.name].isin(df[AutoEvalColumn.row_id.name])]
|
312 |
df2 = df2[df2["#Params (B)"] > 0]
|
313 |
df2 = df2[["model_name_for_query", "#Params (B)", "AVG", "Few-shot"]]
|
314 |
df2["AVG"] = df2["AVG"].astype(float)
|
|
|
335 |
|
336 |
|
337 |
def plot_average_scores(df: pd.DataFrame, hidden_df: pd.DataFrame) -> go.Figure:
|
338 |
+
df2 = hidden_df[hidden_df[AutoEvalColumn.row_id.name].isin(df[AutoEvalColumn.row_id.name])]
|
339 |
df2 = df2[["model_name_for_query", "Few-shot"] + list(TASK_AVG_NAME_MAP.keys())]
|
340 |
df2 = df2.rename(columns={"model_name_for_query": "Model", "Few-shot": "n-shot"})
|
341 |
df2 = df2.rename(columns=TASK_AVG_NAME_MAP)
|
src/display/utils.py
CHANGED
@@ -63,6 +63,7 @@ auto_eval_column_dict.append(
|
|
63 |
)
|
64 |
auto_eval_column_dict.append(["backend", ColumnContent, ColumnContent("Backend Library", "str", False, dummy=True)])
|
65 |
auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
|
|
|
66 |
|
67 |
# We use make dataclass to dynamically fill the scores from Tasks
|
68 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
|
|
63 |
)
|
64 |
auto_eval_column_dict.append(["backend", ColumnContent, ColumnContent("Backend Library", "str", False, dummy=True)])
|
65 |
auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
|
66 |
+
auto_eval_column_dict.append(["row_id", ColumnContent, ColumnContent("ID", "number", False, dummy=True)])
|
67 |
|
68 |
# We use make dataclass to dynamically fill the scores from Tasks
|
69 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
src/populate.py
CHANGED
@@ -15,6 +15,9 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
15 |
|
16 |
df = pd.DataFrame.from_records(all_data_json)
|
17 |
|
|
|
|
|
|
|
18 |
score_cols = [
|
19 |
"ALT E to J BLEU",
|
20 |
"ALT J to E BLEU",
|
|
|
15 |
|
16 |
df = pd.DataFrame.from_records(all_data_json)
|
17 |
|
18 |
+
# Add a row ID column
|
19 |
+
df[AutoEvalColumn.row_id.name] = range(len(df))
|
20 |
+
|
21 |
score_cols = [
|
22 |
"ALT E to J BLEU",
|
23 |
"ALT J to E BLEU",
|
style.css
CHANGED
@@ -135,3 +135,9 @@
|
|
135 |
flex-direction: row;
|
136 |
align-items: center;
|
137 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
flex-direction: row;
|
136 |
align-items: center;
|
137 |
}
|
138 |
+
|
139 |
+
/* Hides the final AutoEvalColumn */
|
140 |
+
#llm-benchmark-tab-table table td:last-child,
|
141 |
+
#llm-benchmark-tab-table table th:last-child {
|
142 |
+
display: none;
|
143 |
+
}
|