Spaces:
Running
Running
import os | |
import json | |
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
from collections import defaultdict | |
LENGTHS = ["dataset_total_score", "4k", "8k", "16k", "32k", "64k", "128k"] | |
datasets_params = json.load(open("datasets_config.json", "r")) | |
TASKS = datasets_params.keys() | |
def make_default_md(): | |
leaderboard_md = f""" | |
π LIBRA LeaderBoard | |
| [GitHub](https://github.com/ai-forever/LIBRA) | [Datasets](https://huggingface.co/datasets/ai-forever/LIBRA) | | |
""" | |
return leaderboard_md | |
def make_model_desc_md(): | |
with open("docs/description.md", "r") as f: | |
description = f.read() | |
return description | |
def make_overall_table_by_tasks(files): | |
results = defaultdict(list) | |
result_dct = {} | |
for file in files: | |
if not file.endswith("json"): continue | |
path = "results/" + file | |
data = json.load(open(path)) | |
model_name = file.split('/')[-1].split(".json")[0] | |
result_dct[model_name] = {} | |
for dataset in data.keys(): | |
if dataset == "total_score": | |
result_dct[model_name][dataset] = round(data[dataset] * 100, 1) | |
continue | |
result_dct[model_name][dataset] = round(data[dataset]["dataset_total_score"] * 100, 1) | |
for file in files: | |
if not file.endswith("json"): continue | |
model_name = file.split('/')[-1].split(".json")[0] | |
results['Model'].append(model_name) | |
for key in result_dct[model_name].keys(): | |
if key == "total_score": | |
results["Total Score"].append(result_dct[model_name][key]) | |
else: | |
results[datasets_params[key]["name"]].append(result_dct[model_name][key]) | |
table = pd.DataFrame(results).sort_values(['Total Score'], ascending=False) | |
cols = table.columns.tolist() | |
cols = [cols[0]] + [cols[22]] + cols[1:22] | |
return table[cols] | |
def make_overall_table_by_lengths(files): | |
results = defaultdict(list) | |
result_dct = {} | |
for file in files: | |
if not file.endswith("json"): continue | |
path = "results/" + file | |
data = json.load(open(path)) | |
model_name = file.split('/')[-1].split(".json")[0] | |
result_dct[model_name] = {} | |
for dataset in data.keys(): | |
if dataset == "total_score": | |
result_dct[model_name][dataset] = data[dataset] | |
continue | |
for length in data[dataset].keys(): | |
if length == "dataset_total_score": continue | |
if length not in result_dct[model_name]: | |
result_dct[model_name][length] = [] | |
result_dct[model_name][length].append(data[dataset][length]) | |
for model_name in result_dct.keys(): | |
for length in result_dct[model_name].keys(): | |
result_dct[model_name][length] = round(np.mean(result_dct[model_name][length]) * 100, 1) | |
for file in files: | |
if not file.endswith("json"): continue | |
model_name = file.split('/')[-1].split(".json")[0] | |
results['Model'].append(model_name) | |
for key in result_dct[model_name].keys(): | |
if key == "total_score": | |
results["Total Score"].append(result_dct[model_name][key]) | |
else: | |
results[key].append(result_dct[model_name][key]) | |
table = pd.DataFrame(results).sort_values(['Total Score'], ascending=False) | |
cols = table.columns.tolist() | |
cols = [cols[0]] + [cols[7]] + cols[1:7] | |
return table[cols] | |
def load_model(files, tab_name): | |
results = defaultdict(list) | |
for file in files: | |
if not file.endswith("json"): continue | |
model_name = file.split('/')[-1].split(".json")[0] | |
results['Model'].append(model_name) | |
result = json.load(open("results/" + file, "r")) | |
for length in LENGTHS: | |
if length in result[tab_name].keys(): | |
if length == "dataset_total_score": | |
results["Dataset Total Score"].append(round(result[tab_name][length] * 100, 1)) | |
continue | |
results[length].append(round(result[tab_name][length] * 100, 1)) | |
else: | |
results[length].append("-") | |
return pd.DataFrame(results).sort_values(['Dataset Total Score'], ascending=False) | |
def build_leaderboard_tab(files): | |
default_md = make_default_md() | |
md_1 = gr.Markdown(default_md, elem_id="leaderboard_markdown") | |
with gr.Tabs() as tabs: | |
with gr.Tab("Results by Lengths", id=0): | |
df = make_overall_table_by_lengths(files) | |
gr.Dataframe( | |
headers=[ | |
"Model", | |
] + LENGTHS, | |
datatype=[ | |
"markdown", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
], | |
value=df, | |
elem_id="arena_leaderboard_dataframe", | |
height=700, | |
wrap=True, | |
) | |
with gr.Tab("Results by Tasks", id=1): | |
df = make_overall_table_by_tasks(files) | |
gr.Dataframe( | |
headers=[ | |
"Model", | |
] + LENGTHS, | |
datatype=[ | |
"markdown", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str" | |
], | |
value=df, | |
elem_id="arena_leaderboard_dataframe", | |
height=700, | |
wrap=False, | |
) | |
for tab_id, tab_name in enumerate(TASKS): | |
df = load_model(files, tab_name) | |
with gr.Tab(datasets_params[tab_name]["name"], id=tab_id+2): | |
gr.Dataframe( | |
headers=[ | |
"Model", | |
] + LENGTHS, | |
datatype=[ | |
"markdown", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
"str", | |
], | |
value=df, | |
elem_id="arena_leaderboard_dataframe", | |
height=700, | |
wrap=True, | |
) | |
with gr.Tab("Description", id=tab_id + 3): | |
desc_md = make_model_desc_md() | |
gr.Markdown(desc_md, elem_id="leaderboard_markdown") | |
return [md_1] | |
def build_demo(files): | |
text_size = gr.themes.sizes.text_lg | |
with gr.Blocks(title="LIBRA leaderboard", | |
theme=gr.themes.Base(text_size=text_size)) as demo: | |
build_leaderboard_tab(files) | |
return demo | |
if __name__ == "__main__": | |
files = os.listdir("results") | |
demo = build_demo(files) | |
demo.launch(share=False) | |