Spaces:

hf-audio
/

open_asr_leaderboard

Running on CPU Upgrade

App Files Files Community

whisper-leaderboard

#31

by Steveeeeeeen HF staff - opened 1 day ago

base: refs/heads/main

←

from: refs/pr/31

Discussion Files changed

+110

-7

Files changed (2) hide show

app.py +96 -6
init.py +14 -1

app.py CHANGED Viewed

@@ -22,14 +22,32 @@ column_names = {
     "Voxpopuli WER": "Voxpopuli",
 }
-eval_queue_repo, requested_models, csv_results = load_all_info_from_dataset_hub()
 if not csv_results.exists():
     raise Exception(f"CSV file {csv_results} does not exist locally")
 # Get csv with data and parse columns
 original_df = pd.read_csv(csv_results)
 # Formats the columns
 def formatter(x):
     if type(x) is str:
@@ -43,9 +61,31 @@ for col in original_df.columns:
         original_df[col] = original_df[col].apply(lambda x: x.replace(x, make_clickable_model(x)))
     else:
         original_df[col] = original_df[col].apply(formatter) # For numerical values
 original_df.rename(columns=column_names, inplace=True)
 original_df.sort_values(by='Average WER ⬇️', inplace=True)
 COLS = [c.name for c in fields(AutoEvalColumn)]
 TYPES = [c.type for c in fields(AutoEvalColumn)]
@@ -115,11 +155,58 @@ with gr.Blocks(css=LEADERBOARD_CSS) as demo:
                 interactive=False,
                 visible=True,
                 )
-        with gr.TabItem("📈 Metrics", elem_id="od-benchmark-tab-table", id=1):
             gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text")
-        with gr.TabItem("✉️✨ Request a model here!", elem_id="od-benchmark-tab-table", id=2):
             with gr.Column():
                 gr.Markdown("# ✉️✨ Request results for a new model here!", elem_classes="markdown-text")
             with gr.Column():
@@ -133,6 +220,9 @@ with gr.Blocks(css=LEADERBOARD_CSS) as demo:
                     btn_submitt.click(request_model,
                                       [model_name_textbox, chb_coco2017],
                                       mdw_submission_result)
     gr.Markdown(f"Last updated on **{LAST_UPDATED}**", elem_classes="markdown-text")

     "Voxpopuli WER": "Voxpopuli",
 }
+whisper_column_names = {
+    "MODEL": "Model",
+    "Avg. WER": "Average WER ⬇️",
+    "RTFx": "RTFx ⬆️️",
+    "Backend": "Backend",
+    "Hardware": "Device",
+    "AMI WER": "AMI",
+    "Earnings22 WER": "Earnings22",
+    "Gigaspeech WER": "Gigaspeech",
+    "LS Clean WER": "LS Clean",
+    "LS Other WER": "LS Other",
+    "SPGISpeech WER": "SPGISpeech",
+    "Tedlium WER": "Tedlium",
+    "Voxpopuli WER": "Voxpopuli",
+}
+eval_queue_repo, requested_models, csv_results, whisper_eval_queue_repo, whisper_csv_results = load_all_info_from_dataset_hub()
 if not csv_results.exists():
     raise Exception(f"CSV file {csv_results} does not exist locally")
+if not whisper_csv_results.exists():
+    raise Exception(f"CSV file {whisper_csv_results} does not exist locally")
 # Get csv with data and parse columns
 original_df = pd.read_csv(csv_results)
+whisper_df = pd.read_csv(whisper_csv_results)
 # Formats the columns
 def formatter(x):
     if type(x) is str:
         original_df[col] = original_df[col].apply(lambda x: x.replace(x, make_clickable_model(x)))
     else:
         original_df[col] = original_df[col].apply(formatter) # For numerical values
+def filter_whisper_table(backends, device):
+    filtered_df = whisper_df.copy()
+    # Handle backend filtering
+    if backends and "All" not in backends:
+        filtered_df = filtered_df[filtered_df["Backend"].isin(backends)]
+    # Handle device filtering
+    if device != "All" and "Device" in filtered_df.columns:
+        filtered_df = filtered_df[filtered_df["Device"] == device]
+    return filtered_df
+# Add clickable links for whisper models too
+for col in whisper_df.columns:
+    if col == "model":
+        whisper_df[col] = whisper_df[col].apply(lambda x: x.replace(x, make_clickable_model(x)))
+    else:
+        whisper_df[col] = whisper_df[col].apply(formatter)
 original_df.rename(columns=column_names, inplace=True)
 original_df.sort_values(by='Average WER ⬇️', inplace=True)
+whisper_df.rename(columns=whisper_column_names, inplace=True)
+whisper_df.sort_values(by='Average WER ⬇️', inplace=True)
 COLS = [c.name for c in fields(AutoEvalColumn)]
 TYPES = [c.type for c in fields(AutoEvalColumn)]
                 interactive=False,
                 visible=True,
                 )
+        with gr.TabItem("🔄 Whisper Model Leaderboard", elem_id="whisper-backends-tab", id=1):
+            gr.Markdown("## Whisper Model Performance Across Different Backends", elem_classes="markdown-text")
+            gr.Markdown("This table shows how different Whisper model implementations compare in terms of performance and speed.", elem_classes="markdown-text")
+            gr.Markdown(
+                    """
+                    * For CUDA tests, we used an **NVIDIA A100-SXM4-40GB GPU** with **CUDA 12.6** with a batch size of 64.
+                    * For Metal tests, we used a **macOS ARM64 192GB 76-core Mac Studio M2-Ultra** with **macOS 15** with a batch size of 1.
+                    """,
+                    elem_classes="markdown-text"
+                )
+            with gr.Row():
+                backend_filter = gr.Dropdown(
+                    choices=["All"] + sorted(whisper_df["Backend"].unique().tolist()),
+                    value="All",
+                    label="Filter by Backend",
+                    elem_id="backend-filter",
+                    multiselect=True,
+                )
+                device_choices = ["All"] + sorted(whisper_df["Device"].unique().tolist()) if "Device" in whisper_df.columns else ["All"]
+                device_filter = gr.Dropdown(
+                    choices=device_choices,
+                    value="All",
+                    label="Filter by Device",
+                    elem_id="device-filter",
+                    multiselect=False,
+                    allow_custom_value=False
+                )
+            whisper_table = gr.components.Dataframe(
+                value=whisper_df,
+                datatype=TYPES,
+                elem_id="whisper-table",
+                interactive=False,
+                visible=True,
+            )
+            backend_filter.change(
+                filter_whisper_table,
+                inputs=[backend_filter, device_filter],
+                outputs=whisper_table
+            )
+            device_filter.change(
+                filter_whisper_table,
+                inputs=[backend_filter, device_filter],
+                outputs=whisper_table
+            )
+        with gr.TabItem("📈 Metrics", elem_id="od-benchmark-tab-table", id=2):
             gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text")
+        with gr.TabItem("✉️✨ Request a model here!", elem_id="od-benchmark-tab-table", id=3):
             with gr.Column():
                 gr.Markdown("# ✉️✨ Request results for a new model here!", elem_classes="markdown-text")
             with gr.Column():
                     btn_submitt.click(request_model,
                                       [model_name_textbox, chb_coco2017],
                                       mdw_submission_result)
+        # add an about section
+        with gr.TabItem("🤗 About", elem_id="od-benchmark-tab-table", id=4):
+            gr.Markdown("## About", elem_classes="markdown-text")
     gr.Markdown(f"Last updated on **{LAST_UPDATED}**", elem_classes="markdown-text")

init.py CHANGED Viewed

@@ -5,7 +5,9 @@ from huggingface_hub import HfApi, Repository
 TOKEN_HUB = os.environ.get("TOKEN_HUB", None)
 QUEUE_REPO = os.environ.get("QUEUE_REPO", None)
 QUEUE_PATH = os.environ.get("QUEUE_PATH", None)
 hf_api = HfApi(
     endpoint="https://huggingface.co",
@@ -29,6 +31,14 @@ def load_all_info_from_dataset_hub():
             repo_type="dataset",
         )
         eval_queue_repo.git_pull()
         # Local directory where dataset repo is cloned + folder with eval requests
         directory = QUEUE_PATH / EVAL_REQUESTS_PATH
@@ -38,10 +48,13 @@ def load_all_info_from_dataset_hub():
         csv_results = get_csv_with_results(QUEUE_PATH)
         if csv_results is None:
             passed = False
     if not passed:
         raise ValueError("No Hugging Face token provided. Skipping evaluation requests and results.")
-    return eval_queue_repo, requested_models, csv_results
 def upload_file(requested_model_name, path_or_fileobj):

 TOKEN_HUB = os.environ.get("TOKEN_HUB", None)
 QUEUE_REPO = os.environ.get("QUEUE_REPO", None)
+QUEUE_REPO_WHISPER = os.environ.get("QUEUE_REPO_WHISPER", None)
 QUEUE_PATH = os.environ.get("QUEUE_PATH", None)
+QUEUE_PATH_WHISPER = os.environ.get("QUEUE_PATH_WHISPER", None)
 hf_api = HfApi(
     endpoint="https://huggingface.co",
             repo_type="dataset",
         )
         eval_queue_repo.git_pull()
+        whisper_eval_queue_repo = Repository(
+            local_dir=QUEUE_PATH_WHISPER,
+            clone_from=QUEUE_REPO_WHISPER,
+            use_auth_token=TOKEN_HUB,
+            repo_type="dataset",
+        )
+        whisper_eval_queue_repo.git_pull()
         # Local directory where dataset repo is cloned + folder with eval requests
         directory = QUEUE_PATH / EVAL_REQUESTS_PATH
         csv_results = get_csv_with_results(QUEUE_PATH)
         if csv_results is None:
             passed = False
+        whisper_csv_results = get_csv_with_results(QUEUE_PATH_WHISPER)
+        if whisper_csv_results is None:
+            passed = False
     if not passed:
         raise ValueError("No Hugging Face token provided. Skipping evaluation requests and results.")
+    return eval_queue_repo, requested_models, csv_results, whisper_eval_queue_repo, whisper_csv_results
 def upload_file(requested_model_name, path_or_fileobj):