Spaces:
Running
Running
# gradio display leaderboard | |
import pandas as pd | |
import numpy as np | |
import matplotlib | |
# matplotlib.use('macosx') | |
import gradio as gr | |
import matplotlib.pyplot as plt | |
import plotly.graph_objects as go | |
from apscheduler.schedulers.background import BackgroundScheduler | |
from texts import * | |
from leaderboards import eng_leaderboards, chi_leaderboards, dataset_abbr_en_dict, dataset_abbr_zh_dict | |
import toml | |
import os | |
from latex_utils import gen_latex_table | |
config = toml.load("config.toml") | |
def create_lang_tabs(lang, lang_cates): | |
df_dict = {} | |
for dataset, cates in lang_cates: | |
dataset_dt = {} | |
for cat in cates: | |
leaderboard_df = pd.read_csv(f'./data_v2/{dataset}_{lang}_{cat}_gen.csv') | |
dataset_dt[cat] = leaderboard_df | |
df_dict[dataset] = dataset_dt | |
return df_dict | |
dict_lang = { | |
'English': create_lang_tabs('en', eng_leaderboards), | |
'Chinese': create_lang_tabs('zh', chi_leaderboards) | |
} | |
def process_mc_df(df, shot=None): | |
# 将name列重命名为Model | |
df = df.rename(columns={"name": "Model"}) | |
# 将zero_naive, zero_self_con, zero_cot, zero_cot_self_con, few_naive, few_self_con, few_cot, few_cot_self_con列重新组织成MultiIndex,一层为Zeroshot, Fewshot,一层为Naive, Self-Consistency, CoT, CoT+Self-Consistency | |
df = df.set_index("Model") | |
columns = [col for col in df.columns if col != "Model"] | |
col_to_multi_index = { | |
"zero_naive": ("Zeroshot", "Naive"), | |
"zero_self_con": ("Zeroshot", "SC"), | |
"zero_cot": ("Zeroshot", "CoT"), | |
"zero_cot_self_con": ("Zeroshot", "CoT+SC"), | |
"few_naive": ("Fewshot", "Naive"), | |
"few_self_con": ("Fewshot", "SC"), | |
"few_cot": ("Fewshot", "CoT"), | |
"few_cot_self_con": ("Fewshot", "CoT+SC"), | |
} | |
columns = [col_to_multi_index[col] for col in df.columns] | |
# df = df.stack().unstack() | |
try: | |
df.columns = pd.MultiIndex.from_tuples(columns) | |
except: | |
print(df) | |
raise | |
# 保留shot的列,比如如果shot=Zeroshot那么只有Zeroshot的列会被保留 | |
if shot: | |
df = df[shot] | |
# 将除了Model列之外的列的value转换为数值型,失败的为NaN | |
df = df.apply(pd.to_numeric, errors="coerce") | |
# 保留小数点后两位 | |
df = df.round(2) | |
# 给每一行添加一列BestScore | |
df["BestScore"] = df.max(axis=1) | |
# 根据BestScore给df排序 | |
df = df.sort_values(by="BestScore", ascending=False) | |
# reset_index | |
df = df.reset_index() | |
# 对于所有空的值,填充为'/' | |
df = df.fillna('/') | |
return df | |
def process_qa_df(df): | |
# 保留小数点后四位 | |
df = df.round(4) | |
return df | |
def dataframe_to_gradio(df, is_mc=True, shot=None): | |
if is_mc: | |
df = process_mc_df(df, shot) | |
else: | |
df = process_qa_df(df) | |
headers = df.columns | |
# types = ["str"] + ["number"] * (len(headers) - 1) | |
return gr.components.Dataframe( | |
value=df.values.tolist(), | |
headers=[label for label in df.columns], | |
# datatype=types, | |
# max_rows=10, | |
) | |
def plot_radar_chart(df, attributes): | |
fig = go.Figure() | |
for index, row in df.iterrows(): | |
model = row['Model'] | |
values = row[attributes].tolist() | |
fig.add_trace(go.Scatterpolar( | |
r=values, | |
theta=attributes, | |
fill='toself', | |
name=model | |
)) | |
fig.update_layout( | |
title="OpsEval", | |
polar=dict( | |
radialaxis=dict( | |
visible=True, | |
range=[0, 0.9] | |
)), | |
showlegend=True | |
) | |
return fig | |
def pop_latex_table(caption, label, lang, dataset, cat): | |
table = gen_latex_table(caption, label, global_df[lang][dataset][cat]) | |
return gr.Textbox(table, label="LaTeX Table", visible=True) | |
def generate_csv(lang, dataset, cat, shot, chosen_dataset): | |
df = global_df[lang][dataset][cat] | |
df.to_csv(f"{chosen_dataset}_{lang}_{cat}_{shot}.csv", index=False) | |
download_link = gr.File(label="Download Link", type="filepath", value=f"{chosen_dataset}_{lang}_{cat}_{shot}.csv", | |
visible=True) | |
return download_link | |
global_df = {} | |
def create_lang_leader_board(lang_dict, lang, dis_lang='en'): | |
best_scores = {} | |
best_plot_datasets = [] | |
for dataset, value in lang_dict.items(): | |
for cat, df in value.items(): | |
if cat == 'mc': | |
processed = process_mc_df(df) | |
bestscores = processed['BestScore'] | |
best_scores[dataset] = bestscores | |
best_plot_datasets.append(dataset) | |
best_df = pd.DataFrame(best_scores) | |
# print(best_scores) | |
# print(best_df) | |
# plot = plot_radar_chart(pd.DataFrame(best_scores), best_plot_datasets) | |
# gr.Plot(plot) | |
tab_list = [] | |
latex_btn_list = [] | |
latex_textbox_list = [] | |
csv_btn_list = [] | |
csv_textbox_list = [] | |
export_params = [] | |
if lang not in global_df: | |
global_df[lang] = {} | |
for dataset, value in lang_dict.items(): | |
if dataset not in global_df[lang]: | |
global_df[lang][dataset] = {} | |
chosen_dict = dataset_abbr_en_dict if dis_lang == "en" else dataset_abbr_zh_dict | |
with gr.Tab(chosen_dict[dataset]) as tab: | |
for cat, df in value.items(): | |
if cat not in global_df[lang][dataset]: | |
global_df[lang][dataset][cat] = df.copy(deep=True) | |
if cat == 'mc': | |
for shot in ['Zeroshot', 'Fewshot']: | |
with gr.Tab(f'Multiple Choice Question ({shot})'): | |
df_component = dataframe_to_gradio(df, is_mc=True, shot=shot) | |
# 加一个latex表格导出按钮, 按一下弹出一个浮动文本窗口 | |
latex_textbox = gr.Textbox(label="LaTeX Table", visible=False) | |
download_link = gr.File(label="Download Link", type="filepath", | |
visible=False) | |
latex_caption = gr.Text(value=f"{chosen_dict[dataset]} Multiple Choice Question ({shot}, {lang}) Leaderboard", visible=False) | |
latex_label = gr.Text(value=f"tab:{dataset}_{shot}_{lang}", visible=False) | |
param_lang = gr.Text(value=lang, visible=False) | |
param_dataset = gr.Text(value=dataset, visible=False) | |
param_cat = gr.Text(value=cat, visible=False) | |
param_shot = gr.Text(value=shot, visible=False) | |
dataset_name = gr.Text(value=chosen_dict[dataset], visible=False) | |
with gr.Row(): | |
latex_button = gr.Button("Export LaTeX Table", variant="primary") | |
csv_button = gr.Button("Export CSV", variant="primary") | |
latex_btn_list.append(latex_button) | |
latex_textbox_list.append(latex_textbox) | |
csv_btn_list.append(csv_button) | |
csv_textbox_list.append(download_link) | |
export_params.append({ | |
"caption": latex_caption, | |
"label": latex_label, | |
"lang": param_lang, | |
"dataset": param_dataset, | |
"cat": param_cat, | |
"shot": param_shot, | |
"dataset_name": dataset_name | |
}) | |
else: | |
with gr.Tab('Question Answering'): | |
df_component = dataframe_to_gradio(df, is_mc=False) | |
# 加一个latex表格导出按钮, 按一下弹出一个浮动文本窗口 | |
latex_textbox = gr.Textbox(label="LaTeX Table", visible=False) | |
download_link = gr.File(label="Download Link", type="filepath", | |
visible=False) | |
latex_caption = gr.Text(value=f"{chosen_dict[dataset]} Question Answering ({lang}) Leaderboard", visible=False) | |
latex_label = gr.Text(value=f"tab:{dataset}_{lang}", visible=False) | |
param_lang = gr.Text(value=lang, visible=False) | |
param_dataset = gr.Text(value=dataset, visible=False) | |
param_cat = gr.Text(value=cat, visible=False) | |
param_shot = gr.Text(value=shot, visible=False) | |
dataset_name = gr.Text(value=chosen_dict[dataset], visible=False) | |
with gr.Row(): | |
latex_button = gr.Button("Export LaTeX Table", variant="primary") | |
csv_button = gr.Button("Export CSV", variant="primary") | |
latex_btn_list.append(latex_button) | |
latex_textbox_list.append(latex_textbox) | |
csv_btn_list.append(csv_button) | |
csv_textbox_list.append(download_link) | |
export_params.append({ | |
"caption": latex_caption, | |
"label": latex_label, | |
"lang": param_lang, | |
"dataset": param_dataset, | |
"cat": param_cat, | |
"shot": param_shot, | |
"dataset_name": dataset_name | |
}) | |
tab_list.append(tab) | |
return tab_list, latex_btn_list, latex_textbox_list, csv_btn_list, csv_textbox_list, export_params | |
def get_latest_modification_date(): | |
latest = 0 | |
for file in os.listdir(config['dataset']['dataset_dir']): | |
if file.endswith('.csv'): | |
mtime = os.path.getmtime(os.path.join(config['dataset']['dataset_dir'], file)) | |
latest = max(latest, mtime) | |
latest = pd.to_datetime(latest, unit='s') | |
return latest.strftime("%Y-%m-%d %H:%M:%S") | |
translation_dict = { | |
'zh': { | |
'intro': ZH_INTRODUCTION_TEXT, | |
'title': ZH_TITLE, | |
'lb_sec': f"""# 🏅 排行榜 \n 更新时间: {get_latest_modification_date()}\n""", | |
}, | |
'en': { | |
'intro': INTRODUCTION_TEXT, | |
'title': TITLE, | |
'lb_sec': f"""# 🏅 Leaderboard \n Latest update: {get_latest_modification_date()}\n""" | |
} | |
} | |
def get_language_lb(language): | |
tab_dict = {'English': None, 'Chinese': None} | |
for key, dict in dict_lang.items(): | |
tab_list, latex_btn_list, latex_textbox_list, csv_btn_list, csv_textbox_list, export_params = create_lang_leader_board(dict, key, language) | |
tab_dict[key] = (tab_list, latex_btn_list, latex_textbox_list, csv_btn_list, csv_textbox_list, export_params) | |
return tab_dict | |
def switch_language(language): | |
# gr.update(visible=True) | |
tab_dict = get_language_lb(language) | |
tab_list = [*(tab_dict['English'][0]), *(tab_dict['Chinese'][0])] | |
return translation_dict[language]['title'], translation_dict[language]['intro'], translation_dict[language]['lb_sec'], *tab_list, language | |
def get_lb_body(language='en'): | |
tab_dict = {'English': None, 'Chinese': None} | |
with gr.Blocks() as body: | |
for key, dict in dict_lang.items(): | |
with gr.Tab(key): | |
tab_list, latex_btn_list, latex_textbox_list, csv_btn_list, csv_textbox_list, export_params = create_lang_leader_board(dict, key, language) | |
tab_dict[key] = (tab_list, latex_btn_list, latex_textbox_list, csv_btn_list, csv_textbox_list, export_params) | |
return body, tab_dict | |
def launch_gradio(): | |
demo = gr.Blocks() | |
with demo: | |
lang_state = gr.State("en") | |
with gr.Row(): | |
en_button = gr.Button("English", variant="primary") | |
zh_button = gr.Button("中文", variant="primary") | |
title = gr.HTML(TITLE) | |
intro = gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
leaderboard_section = gr.Markdown(f"""# 🏅 Leaderboard \n Latest update: {get_latest_modification_date()}\n""", | |
elem_classes="markdown-text") | |
lb_body, tab_dict = get_lb_body(language=lang_state.value) | |
for lang in ['English', 'Chinese']: | |
tab_list, latex_btn_list, latex_textbox_list, csv_btn_list, csv_textbox_list, export_params = tab_dict[lang] | |
for btn, params, textbox in zip(latex_btn_list, export_params, latex_textbox_list): | |
btn.click(pop_latex_table, inputs=[ | |
params["caption"], | |
params["label"], | |
params["lang"], | |
params["dataset"], | |
params["cat"]], | |
outputs=[textbox]) | |
for btn, params, textbox in zip(csv_btn_list, export_params, csv_textbox_list): | |
btn.click(generate_csv, inputs=[ | |
params["lang"], | |
params["dataset"], | |
params["cat"], | |
params["shot"], | |
params["dataset_name"]], | |
outputs=[textbox]) | |
tab_list = [*(tab_dict['English'][0]), *(tab_dict['Chinese'][0])] | |
# print(tab_list) | |
en_button.click(switch_language, inputs=[gr.State("en")], outputs=[title, intro, leaderboard_section, *tab_list, lang_state], postprocess=False) | |
zh_button.click(switch_language, inputs=[gr.State("zh")], outputs=[title, intro, leaderboard_section, *tab_list, lang_state], postprocess=False) | |
return demo | |
pd.set_option('display.float_format', '{:.02f}'.format) | |
scheduler = BackgroundScheduler() | |
scheduler.add_job(launch_gradio, 'interval', hours=1) | |
scheduler.start() | |
demo = launch_gradio() | |
demo.launch() |