DomainEval / app.py
zhuqiming
Update space
d05cebd
raw
history blame
6.5 kB
import json
import gradio as gr
import pandas as pd
from css_html import custom_css
from text_content import CITATION_BUTTON_TEXT, CITATION_BUTTON_LABEL, ACKNOWLEDGEMENT_TEXT, NOTES_TEXT, HEAD_TEXT
from utils import (
AutoEvalColumn,
fields,
)
result_path = './results.json'
task_type = ["Pass@1 (Greedy Search N=1 Temperature=0.0)", "Pass@5 (Sampling Search N=5 Temperature=0.2)"]
cur_task = "Pass@1"
next_task = "Pass@5"
def data_convert(data_pass_k : list):
df = {"Model":{}}
for item in data_pass_k:
model_name = item["Model"]
domain = item["Domain"]
pass_at_k = item["Pass_at_k"]
if domain not in df: df[domain] = {}
assert model_name not in df[domain]
df[domain][model_name] = round(pass_at_k*100, 2)
df["Model"][model_name] = model_name
df = pd.DataFrame(df)
df = df.sort_values(by='Mean', ascending=False)
return df
with open(result_path, 'r') as f:
data = json.load(f)
df = data_convert(data["pass_1"])
df_next = data_convert(data["pass_5"])
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
COLS_LITE = [
c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden
]
TYPES_LITE = [
c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden
]
def select_columns(df, columns):
always_here_cols = [
AutoEvalColumn.model.name,
]
# We use COLS to maintain sorting
filtered_df = df[
always_here_cols + [c for c in COLS if c in df.columns and c in columns]
]
return filtered_df
def select_tasks(df, columns, df_next):
always_here_cols = [
AutoEvalColumn.model.name,
]
df,df_next = df_next,df
filtered_df = df[
always_here_cols + [c for c in COLS if c in df.columns and c in columns]
]
return df,filtered_df,df_next
demo = gr.Blocks(css=custom_css)
with demo:
with gr.Column():
gr.Markdown(
"""<div style="text-align: center;"><h1>DomainEval Leaderboard</h1></div>\
<br>\
""",
elem_classes="markdown-text",
)
gr.Markdown(HEAD_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.Column():
with gr.Tabs(elem_classes="A100-tabs") as A100_tabs:
with gr.TabItem("πŸ” Evaluation Table", id=0):
with gr.Column():
with gr.Accordion("⏬ Pass@k", open=True):
shown_tasks = gr.Radio(
choices=[
c
for c in task_type
],
value=[
c
for c in task_type
if cur_task in c
][0] if any(cur_task in c for c in task_type) else None,
label="",
elem_id="task-select",
interactive=True,
)
with gr.Accordion("⏬ Domains", open=True):
shown_languages = gr.CheckboxGroup(
choices=[
c
for c in COLS
if c
not in [
AutoEvalColumn.model.name,
]
],
value=[
c
for c in COLS_LITE
if c
not in [
AutoEvalColumn.model.name,
]
],
label="",
elem_id="column-select",
interactive=True,
)
leaderboard_df = gr.components.Dataframe(
value=df[
[
AutoEvalColumn.model.name,
]
+ shown_languages.value
],
headers=COLS,
datatype=TYPES,
elem_id="leaderboard-table",
interactive=False,
)
hidden_leaderboard_df = gr.components.Dataframe(
value=df,
headers=COLS,
datatype=["str" for _ in range(len(COLS))],
visible=False,
)
leaderboard_next = gr.components.Dataframe(
value=df_next,
headers=COLS,
datatype=["str" for _ in range(len(COLS))],
visible=False,
)
shown_languages.change(
select_columns,
[hidden_leaderboard_df, shown_languages],
leaderboard_df,
)
shown_tasks.change(
select_tasks,
[hidden_leaderboard_df, shown_languages, leaderboard_next],
[hidden_leaderboard_df, leaderboard_df, leaderboard_next],
)
gr.Markdown(NOTES_TEXT, elem_classes="markdown-text")
with gr.Row():
with gr.Accordion("πŸ“™ Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
lines=10,
elem_id="citation-button",
show_copy_button=True,
)
with gr.Row():
with gr.Accordion("πŸ™ Acknowledgement", open=False):
gr.Markdown(ACKNOWLEDGEMENT_TEXT)
demo.launch()