ai-forever's picture
Upload app.py
0fb3ba6 verified
import os
import json
import gradio as gr
import pandas as pd
import numpy as np
from collections import defaultdict
LENGTHS = ["dataset_total_score", "4k", "8k", "16k", "32k", "64k", "128k"]
datasets_params = json.load(open("datasets_config.json", "r"))
TASKS = datasets_params.keys()
def make_default_md():
leaderboard_md = f"""
πŸ… LIBRA LeaderBoard
| [GitHub](https://github.com/ai-forever/LIBRA) | [Datasets](https://huggingface.co/datasets/ai-forever/LIBRA) |
"""
return leaderboard_md
def make_model_desc_md():
with open("docs/description.md", "r") as f:
description = f.read()
return description
def make_overall_table_by_tasks(files):
results = defaultdict(list)
result_dct = {}
for file in files:
if not file.endswith("json"): continue
path = "results/" + file
data = json.load(open(path))
model_name = file.split('/')[-1].split(".json")[0]
result_dct[model_name] = {}
for dataset in data.keys():
if dataset == "total_score":
result_dct[model_name][dataset] = round(data[dataset] * 100, 1)
continue
result_dct[model_name][dataset] = round(data[dataset]["dataset_total_score"] * 100, 1)
for file in files:
if not file.endswith("json"): continue
model_name = file.split('/')[-1].split(".json")[0]
results['Model'].append(model_name)
for key in result_dct[model_name].keys():
if key == "total_score":
results["Total Score"].append(result_dct[model_name][key])
else:
results[datasets_params[key]["name"]].append(result_dct[model_name][key])
table = pd.DataFrame(results).sort_values(['Total Score'], ascending=False)
cols = table.columns.tolist()
cols = [cols[0]] + [cols[22]] + cols[1:22]
return table[cols]
def make_overall_table_by_lengths(files):
results = defaultdict(list)
result_dct = {}
for file in files:
if not file.endswith("json"): continue
path = "results/" + file
data = json.load(open(path))
model_name = file.split('/')[-1].split(".json")[0]
result_dct[model_name] = {}
for dataset in data.keys():
if dataset == "total_score":
result_dct[model_name][dataset] = data[dataset]
continue
for length in data[dataset].keys():
if length == "dataset_total_score": continue
if length not in result_dct[model_name]:
result_dct[model_name][length] = []
result_dct[model_name][length].append(data[dataset][length])
for model_name in result_dct.keys():
for length in result_dct[model_name].keys():
result_dct[model_name][length] = round(np.mean(result_dct[model_name][length]) * 100, 1)
for file in files:
if not file.endswith("json"): continue
model_name = file.split('/')[-1].split(".json")[0]
results['Model'].append(model_name)
for key in result_dct[model_name].keys():
if key == "total_score":
results["Total Score"].append(result_dct[model_name][key])
else:
results[key].append(result_dct[model_name][key])
table = pd.DataFrame(results).sort_values(['Total Score'], ascending=False)
cols = table.columns.tolist()
cols = [cols[0]] + [cols[7]] + cols[1:7]
return table[cols]
def load_model(files, tab_name):
results = defaultdict(list)
for file in files:
if not file.endswith("json"): continue
model_name = file.split('/')[-1].split(".json")[0]
results['Model'].append(model_name)
result = json.load(open("results/" + file, "r"))
for length in LENGTHS:
if length in result[tab_name].keys():
if length == "dataset_total_score":
results["Dataset Total Score"].append(round(result[tab_name][length] * 100, 1))
continue
results[length].append(round(result[tab_name][length] * 100, 1))
else:
results[length].append("-")
return pd.DataFrame(results).sort_values(['Dataset Total Score'], ascending=False)
def build_leaderboard_tab(files):
default_md = make_default_md()
md_1 = gr.Markdown(default_md, elem_id="leaderboard_markdown")
with gr.Tabs() as tabs:
with gr.Tab("Results by Lengths", id=0):
df = make_overall_table_by_lengths(files)
gr.Dataframe(
headers=[
"Model",
] + LENGTHS,
datatype=[
"markdown",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
],
value=df,
elem_id="arena_leaderboard_dataframe",
height=700,
wrap=True,
)
with gr.Tab("Results by Tasks", id=1):
df = make_overall_table_by_tasks(files)
gr.Dataframe(
headers=[
"Model",
] + LENGTHS,
datatype=[
"markdown",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str"
],
value=df,
elem_id="arena_leaderboard_dataframe",
height=700,
wrap=False,
)
for tab_id, tab_name in enumerate(TASKS):
df = load_model(files, tab_name)
with gr.Tab(datasets_params[tab_name]["name"], id=tab_id+2):
gr.Dataframe(
headers=[
"Model",
] + LENGTHS,
datatype=[
"markdown",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
],
value=df,
elem_id="arena_leaderboard_dataframe",
height=700,
wrap=True,
)
with gr.Tab("Description", id=tab_id + 3):
desc_md = make_model_desc_md()
gr.Markdown(desc_md, elem_id="leaderboard_markdown")
return [md_1]
def build_demo(files):
text_size = gr.themes.sizes.text_lg
with gr.Blocks(title="LIBRA leaderboard",
theme=gr.themes.Base(text_size=text_size)) as demo:
build_leaderboard_tab(files)
return demo
if __name__ == "__main__":
files = os.listdir("results")
demo = build_demo(files)
demo.launch(share=False)