Steven Zheng
added whipser leaderboard
4a6c7b9
raw
history blame
8.76 kB
import gradio as gr
import pandas as pd
import json
from constants import BANNER, INTRODUCTION_TEXT, CITATION_TEXT, METRICS_TAB_TEXT, DIR_OUTPUT_REQUESTS, LEADERBOARD_CSS
from init import is_model_on_hub, upload_file, load_all_info_from_dataset_hub
from utils_display import AutoEvalColumn, fields, make_clickable_model, styled_error, styled_message
from datetime import datetime, timezone
LAST_UPDATED = "Nov 22th 2024"
column_names = {
"MODEL": "Model",
"Avg. WER": "Average WER ⬇️",
"RTFx": "RTFx ⬆️️",
"AMI WER": "AMI",
"Earnings22 WER": "Earnings22",
"Gigaspeech WER": "Gigaspeech",
"LS Clean WER": "LS Clean",
"LS Other WER": "LS Other",
"SPGISpeech WER": "SPGISpeech",
"Tedlium WER": "Tedlium",
"Voxpopuli WER": "Voxpopuli",
}
whisper_column_names = {
"MODEL": "Model",
"Avg. WER": "Average WER ⬇️",
"RTFx": "RTFx ⬆️️",
"Backend": "Backend",
"Hardware": "Device",
"AMI WER": "AMI",
"Earnings22 WER": "Earnings22",
"Gigaspeech WER": "Gigaspeech",
"LS Clean WER": "LS Clean",
"LS Other WER": "LS Other",
"SPGISpeech WER": "SPGISpeech",
"Tedlium WER": "Tedlium",
"Voxpopuli WER": "Voxpopuli",
}
eval_queue_repo, requested_models, csv_results, whisper_eval_queue_repo, whisper_csv_results = load_all_info_from_dataset_hub()
if not csv_results.exists():
raise Exception(f"CSV file {csv_results} does not exist locally")
if not whisper_csv_results.exists():
raise Exception(f"CSV file {whisper_csv_results} does not exist locally")
# Get csv with data and parse columns
original_df = pd.read_csv(csv_results)
whisper_df = pd.read_csv(whisper_csv_results)
# Formats the columns
def formatter(x):
if type(x) is str:
x = x
else:
x = round(x, 2)
return x
for col in original_df.columns:
if col == "model":
original_df[col] = original_df[col].apply(lambda x: x.replace(x, make_clickable_model(x)))
else:
original_df[col] = original_df[col].apply(formatter) # For numerical values
whisper_df[col] = whisper_df[col].apply(formatter) # For numerical values
original_df.rename(columns=column_names, inplace=True)
original_df.sort_values(by='Average WER ⬇️', inplace=True)
whisper_df.rename(columns=whisper_column_names, inplace=True)
whisper_df.sort_values(by='Average WER ⬇️', inplace=True)
COLS = [c.name for c in fields(AutoEvalColumn)]
TYPES = [c.type for c in fields(AutoEvalColumn)]
def request_model(model_text, chbcoco2017):
# Determine the selected checkboxes
dataset_selection = []
if chbcoco2017:
dataset_selection.append("ESB Datasets tests only")
if len(dataset_selection) == 0:
return styled_error("You need to select at least one dataset")
base_model_on_hub, error_msg = is_model_on_hub(model_text)
if not base_model_on_hub:
return styled_error(f"Base model '{model_text}' {error_msg}")
# Construct the output dictionary
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
required_datasets = ', '.join(dataset_selection)
eval_entry = {
"date": current_time,
"model": model_text,
"datasets_selected": required_datasets
}
# Prepare file path
DIR_OUTPUT_REQUESTS.mkdir(parents=True, exist_ok=True)
fn_datasets = '@ '.join(dataset_selection)
filename = model_text.replace("/","@") + "@@" + fn_datasets
if filename in requested_models:
return styled_error(f"A request for this model '{model_text}' and dataset(s) was already made.")
try:
filename_ext = filename + ".txt"
out_filepath = DIR_OUTPUT_REQUESTS / filename_ext
# Write the results to a text file
with open(out_filepath, "w") as f:
f.write(json.dumps(eval_entry))
upload_file(filename, out_filepath)
# Include file in the list of uploaded files
requested_models.append(filename)
# Remove the local file
out_filepath.unlink()
return styled_message("πŸ€— Your request has been submitted and will be evaluated soon!</p>")
except Exception as e:
return styled_error(f"Error submitting request!")
with gr.Blocks(css=LEADERBOARD_CSS) as demo:
gr.HTML(BANNER, elem_id="banner")
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("πŸ… Leaderboard", elem_id="od-benchmark-tab-table", id=0):
leaderboard_table = gr.components.Dataframe(
value=original_df,
datatype=TYPES,
elem_id="leaderboard-table",
interactive=False,
visible=True,
)
with gr.TabItem("πŸ”„ Whisper Model Leaderboard", elem_id="whisper-backends-tab", id=1):
gr.Markdown("## Whisper Model Performance Across Different Backends", elem_classes="markdown-text")
gr.Markdown("This table shows how different Whisper model implementations compare in terms of performance and speed.", elem_classes="markdown-text")
with gr.Row():
backend_filter = gr.Dropdown(
choices=["All"] + sorted(whisper_df["Backend"].unique().tolist()),
value="All",
label="Filter by Backend",
elem_id="backend-filter",
multiselect=True # Enable multiple selection
)
device_choices = ["All"] + sorted(whisper_df["Device"].unique().tolist()) if "Device" in whisper_df.columns else ["All"]
device_filter = gr.Dropdown(
choices=device_choices,
value="All",
label="Filter by Device",
elem_id="device-filter",
multiselect=True # Enable multiple selection
)
whisper_table = gr.components.Dataframe(
value=whisper_df,
datatype=TYPES,
elem_id="whisper-table",
interactive=False,
visible=True,
)
def filter_whisper_table(backends, devices):
filtered_df = whisper_df.copy()
# Handle backend filtering
if backends and "All" not in backends:
filtered_df = filtered_df[filtered_df["Backend"].isin(backends)]
# Handle device filtering
if devices and "All" not in devices and "Device" in filtered_df.columns:
filtered_df = filtered_df[filtered_df["Device"].isin(devices)]
return filtered_df
backend_filter.change(
filter_whisper_table,
inputs=[backend_filter, device_filter],
outputs=whisper_table
)
device_filter.change(
filter_whisper_table,
inputs=[backend_filter, device_filter],
outputs=whisper_table
)
with gr.TabItem("πŸ“ˆ Metrics", elem_id="od-benchmark-tab-table", id=2):
gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text")
with gr.TabItem("βœ‰οΈβœ¨ Request a model here!", elem_id="od-benchmark-tab-table", id=3):
with gr.Column():
gr.Markdown("# βœ‰οΈβœ¨ Request results for a new model here!", elem_classes="markdown-text")
with gr.Column():
gr.Markdown("Select a dataset:", elem_classes="markdown-text")
with gr.Column():
model_name_textbox = gr.Textbox(label="Model name (user_name/model_name)")
chb_coco2017 = gr.Checkbox(label="COCO validation 2017 dataset", visible=False, value=True, interactive=False)
with gr.Column():
mdw_submission_result = gr.Markdown()
btn_submitt = gr.Button(value="πŸš€ Request")
btn_submitt.click(request_model,
[model_name_textbox, chb_coco2017],
mdw_submission_result)
gr.Markdown(f"Last updated on **{LAST_UPDATED}**", elem_classes="markdown-text")
with gr.Row():
with gr.Accordion("πŸ“™ Citation", open=False):
gr.Textbox(
value=CITATION_TEXT, lines=7,
label="Copy the BibTeX snippet to cite this source",
elem_id="citation-button",
show_copy_button=True,
)
demo.launch(ssr_mode=False)