Spaces:
Running
Running
Commit
•
26ef426
1
Parent(s):
662ed4b
Implement login for GPQA Details
Browse files- README.md +1 -0
- app.py +9 -7
- src/constants.py +3 -3
- src/details.py +10 -5
README.md
CHANGED
@@ -6,6 +6,7 @@ colorTo: green
|
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.44.1
|
8 |
app_file: app.py
|
|
|
9 |
pinned: false
|
10 |
short_description: Compare Open LLM Leaderboard results
|
11 |
datasets:
|
|
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.44.1
|
8 |
app_file: app.py
|
9 |
+
hf_oauth: true
|
10 |
pinned: false
|
11 |
short_description: Compare Open LLM Leaderboard results
|
12 |
datasets:
|
app.py
CHANGED
@@ -82,7 +82,7 @@ with gr.Blocks(fill_height=True, fill_width=True) as demo:
|
|
82 |
configs = gr.HTML()
|
83 |
with gr.Tab("Details"):
|
84 |
details_task = gr.Radio(
|
85 |
-
|
86 |
label="Tasks",
|
87 |
info="Evaluation tasks to be loaded",
|
88 |
interactive=True,
|
@@ -91,11 +91,13 @@ with gr.Blocks(fill_height=True, fill_width=True) as demo:
|
|
91 |
label="Task Description",
|
92 |
lines=3,
|
93 |
)
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
|
|
|
|
99 |
load_details_btn = gr.Button("Load Details", interactive=False)
|
100 |
clear_details_btn = gr.Button("Clear Details")
|
101 |
sample_idx = gr.Number(
|
@@ -166,7 +168,7 @@ with gr.Blocks(fill_height=True, fill_width=True) as demo:
|
|
166 |
).then(
|
167 |
fn=update_subtasks_component,
|
168 |
inputs=details_task,
|
169 |
-
outputs=subtask,
|
170 |
)
|
171 |
gr.on(
|
172 |
triggers=[model_id_1.input, model_id_2.input, subtask.input, details_task.input],
|
|
|
82 |
configs = gr.HTML()
|
83 |
with gr.Tab("Details"):
|
84 |
details_task = gr.Radio(
|
85 |
+
list(constants.TASKS.values()),
|
86 |
label="Tasks",
|
87 |
info="Evaluation tasks to be loaded",
|
88 |
interactive=True,
|
|
|
91 |
label="Task Description",
|
92 |
lines=3,
|
93 |
)
|
94 |
+
with gr.Row():
|
95 |
+
login_btn = gr.LoginButton(size="sm", visible=False)
|
96 |
+
subtask = gr.Radio(
|
97 |
+
choices=None, # constants.SUBTASKS.get(details_task.value),
|
98 |
+
label="Subtasks",
|
99 |
+
info="Evaluation subtasks to be loaded (choose one of the Tasks above)",
|
100 |
+
)
|
101 |
load_details_btn = gr.Button("Load Details", interactive=False)
|
102 |
clear_details_btn = gr.Button("Clear Details")
|
103 |
sample_idx = gr.Number(
|
|
|
168 |
).then(
|
169 |
fn=update_subtasks_component,
|
170 |
inputs=details_task,
|
171 |
+
outputs=[login_btn, subtask],
|
172 |
)
|
173 |
gr.on(
|
174 |
triggers=[model_id_1.input, model_id_2.input, subtask.input, details_task.input],
|
src/constants.py
CHANGED
@@ -39,9 +39,9 @@ SUBTASKS = {
|
|
39 |
("Web of Lies", "leaderboard_bbh_web_of_lies"),
|
40 |
],
|
41 |
"leaderboard_gpqa": [
|
42 |
-
"leaderboard_gpqa_extended",
|
43 |
-
"leaderboard_gpqa_diamond",
|
44 |
-
"leaderboard_gpqa_main",
|
45 |
],
|
46 |
"leaderboard_ifeval": [("IFEval", "leaderboard_ifeval")],
|
47 |
# "leaderboard_math_hard": [
|
|
|
39 |
("Web of Lies", "leaderboard_bbh_web_of_lies"),
|
40 |
],
|
41 |
"leaderboard_gpqa": [
|
42 |
+
("Extended", "leaderboard_gpqa_extended"),
|
43 |
+
("Diamond", "leaderboard_gpqa_diamond"),
|
44 |
+
("Main", "leaderboard_gpqa_main"),
|
45 |
],
|
46 |
"leaderboard_ifeval": [("IFEval", "leaderboard_ifeval")],
|
47 |
# "leaderboard_math_hard": [
|
src/details.py
CHANGED
@@ -20,11 +20,16 @@ def update_task_description_component(task):
|
|
20 |
)
|
21 |
|
22 |
|
23 |
-
def update_subtasks_component(task):
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
28 |
)
|
29 |
|
30 |
|
|
|
20 |
)
|
21 |
|
22 |
|
23 |
+
def update_subtasks_component(task, profile: gr.OAuthProfile | None):
|
24 |
+
visible_login_btn = True if task == "leaderboard_gpqa" else False
|
25 |
+
subtasks = None if task == "leaderboard_gpqa" and not profile else constants.SUBTASKS.get(task)
|
26 |
+
return (
|
27 |
+
gr.LoginButton(size="sm", visible=visible_login_btn),
|
28 |
+
gr.Radio(
|
29 |
+
choices=subtasks,
|
30 |
+
info="Evaluation subtasks to be loaded",
|
31 |
+
value=None,
|
32 |
+
),
|
33 |
)
|
34 |
|
35 |
|