Steven Zheng commited on
Commit
4a6c7b9
·
1 Parent(s): 6787ab4

added whipser leaderboard

Browse files
Files changed (3) hide show
  1. app.py +77 -6
  2. constants.py +5 -4
  3. init.py +16 -3
app.py CHANGED
@@ -22,14 +22,32 @@ column_names = {
22
  "Voxpopuli WER": "Voxpopuli",
23
  }
24
 
25
- eval_queue_repo, requested_models, csv_results = load_all_info_from_dataset_hub()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  if not csv_results.exists():
28
  raise Exception(f"CSV file {csv_results} does not exist locally")
29
-
 
 
30
  # Get csv with data and parse columns
31
  original_df = pd.read_csv(csv_results)
32
-
33
  # Formats the columns
34
  def formatter(x):
35
  if type(x) is str:
@@ -43,9 +61,11 @@ for col in original_df.columns:
43
  original_df[col] = original_df[col].apply(lambda x: x.replace(x, make_clickable_model(x)))
44
  else:
45
  original_df[col] = original_df[col].apply(formatter) # For numerical values
46
-
47
  original_df.rename(columns=column_names, inplace=True)
48
  original_df.sort_values(by='Average WER ⬇️', inplace=True)
 
 
49
 
50
  COLS = [c.name for c in fields(AutoEvalColumn)]
51
  TYPES = [c.type for c in fields(AutoEvalColumn)]
@@ -115,11 +135,62 @@ with gr.Blocks(css=LEADERBOARD_CSS) as demo:
115
  interactive=False,
116
  visible=True,
117
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
- with gr.TabItem("📈 Metrics", elem_id="od-benchmark-tab-table", id=1):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text")
121
 
122
- with gr.TabItem("✉️✨ Request a model here!", elem_id="od-benchmark-tab-table", id=2):
123
  with gr.Column():
124
  gr.Markdown("# ✉️✨ Request results for a new model here!", elem_classes="markdown-text")
125
  with gr.Column():
 
22
  "Voxpopuli WER": "Voxpopuli",
23
  }
24
 
25
+ whisper_column_names = {
26
+ "MODEL": "Model",
27
+ "Avg. WER": "Average WER ⬇️",
28
+ "RTFx": "RTFx ⬆️️",
29
+ "Backend": "Backend",
30
+ "Hardware": "Device",
31
+ "AMI WER": "AMI",
32
+ "Earnings22 WER": "Earnings22",
33
+ "Gigaspeech WER": "Gigaspeech",
34
+ "LS Clean WER": "LS Clean",
35
+ "LS Other WER": "LS Other",
36
+ "SPGISpeech WER": "SPGISpeech",
37
+ "Tedlium WER": "Tedlium",
38
+ "Voxpopuli WER": "Voxpopuli",
39
+ }
40
+
41
+ eval_queue_repo, requested_models, csv_results, whisper_eval_queue_repo, whisper_csv_results = load_all_info_from_dataset_hub()
42
 
43
  if not csv_results.exists():
44
  raise Exception(f"CSV file {csv_results} does not exist locally")
45
+ if not whisper_csv_results.exists():
46
+ raise Exception(f"CSV file {whisper_csv_results} does not exist locally")
47
+
48
  # Get csv with data and parse columns
49
  original_df = pd.read_csv(csv_results)
50
+ whisper_df = pd.read_csv(whisper_csv_results)
51
  # Formats the columns
52
  def formatter(x):
53
  if type(x) is str:
 
61
  original_df[col] = original_df[col].apply(lambda x: x.replace(x, make_clickable_model(x)))
62
  else:
63
  original_df[col] = original_df[col].apply(formatter) # For numerical values
64
+ whisper_df[col] = whisper_df[col].apply(formatter) # For numerical values
65
  original_df.rename(columns=column_names, inplace=True)
66
  original_df.sort_values(by='Average WER ⬇️', inplace=True)
67
+ whisper_df.rename(columns=whisper_column_names, inplace=True)
68
+ whisper_df.sort_values(by='Average WER ⬇️', inplace=True)
69
 
70
  COLS = [c.name for c in fields(AutoEvalColumn)]
71
  TYPES = [c.type for c in fields(AutoEvalColumn)]
 
135
  interactive=False,
136
  visible=True,
137
  )
138
+ with gr.TabItem("🔄 Whisper Model Leaderboard", elem_id="whisper-backends-tab", id=1):
139
+ gr.Markdown("## Whisper Model Performance Across Different Backends", elem_classes="markdown-text")
140
+ gr.Markdown("This table shows how different Whisper model implementations compare in terms of performance and speed.", elem_classes="markdown-text")
141
+
142
+ with gr.Row():
143
+ backend_filter = gr.Dropdown(
144
+ choices=["All"] + sorted(whisper_df["Backend"].unique().tolist()),
145
+ value="All",
146
+ label="Filter by Backend",
147
+ elem_id="backend-filter",
148
+ multiselect=True # Enable multiple selection
149
+ )
150
+ device_choices = ["All"] + sorted(whisper_df["Device"].unique().tolist()) if "Device" in whisper_df.columns else ["All"]
151
+ device_filter = gr.Dropdown(
152
+ choices=device_choices,
153
+ value="All",
154
+ label="Filter by Device",
155
+ elem_id="device-filter",
156
+ multiselect=True # Enable multiple selection
157
+ )
158
+
159
+ whisper_table = gr.components.Dataframe(
160
+ value=whisper_df,
161
+ datatype=TYPES,
162
+ elem_id="whisper-table",
163
+ interactive=False,
164
+ visible=True,
165
+ )
166
 
167
+ def filter_whisper_table(backends, devices):
168
+ filtered_df = whisper_df.copy()
169
+
170
+ # Handle backend filtering
171
+ if backends and "All" not in backends:
172
+ filtered_df = filtered_df[filtered_df["Backend"].isin(backends)]
173
+
174
+ # Handle device filtering
175
+ if devices and "All" not in devices and "Device" in filtered_df.columns:
176
+ filtered_df = filtered_df[filtered_df["Device"].isin(devices)]
177
+
178
+ return filtered_df
179
+
180
+ backend_filter.change(
181
+ filter_whisper_table,
182
+ inputs=[backend_filter, device_filter],
183
+ outputs=whisper_table
184
+ )
185
+ device_filter.change(
186
+ filter_whisper_table,
187
+ inputs=[backend_filter, device_filter],
188
+ outputs=whisper_table
189
+ )
190
+ with gr.TabItem("📈 Metrics", elem_id="od-benchmark-tab-table", id=2):
191
  gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text")
192
 
193
+ with gr.TabItem("✉️✨ Request a model here!", elem_id="od-benchmark-tab-table", id=3):
194
  with gr.Column():
195
  gr.Markdown("# ✉️✨ Request results for a new model here!", elem_classes="markdown-text")
196
  with gr.Column():
constants.py CHANGED
@@ -116,8 +116,9 @@ For more details on the individual datasets and how models are evaluated to give
116
 
117
  LEADERBOARD_CSS = """
118
  #leaderboard-table th .header-content {
119
- white-space: nowrap;
120
- overflow: hidden;
121
- text-overflow: ellipsis;
122
- }
 
123
  """
 
116
 
117
  LEADERBOARD_CSS = """
118
  #leaderboard-table th .header-content {
119
+ white-space: nowrap;
120
+ }
121
+ #whisper-backends-tab th .header-content {
122
+ white-space: nowrap;
123
+ }
124
  """
init.py CHANGED
@@ -4,8 +4,10 @@ from pathlib import Path
4
  from huggingface_hub import HfApi, Repository
5
 
6
  TOKEN_HUB = os.environ.get("TOKEN_HUB", None)
7
- QUEUE_REPO = os.environ.get("QUEUE_REPO", None)
8
- QUEUE_PATH = os.environ.get("QUEUE_PATH", None)
 
 
9
 
10
  hf_api = HfApi(
11
  endpoint="https://huggingface.co",
@@ -29,6 +31,14 @@ def load_all_info_from_dataset_hub():
29
  repo_type="dataset",
30
  )
31
  eval_queue_repo.git_pull()
 
 
 
 
 
 
 
 
32
 
33
  # Local directory where dataset repo is cloned + folder with eval requests
34
  directory = QUEUE_PATH / EVAL_REQUESTS_PATH
@@ -38,10 +48,13 @@ def load_all_info_from_dataset_hub():
38
  csv_results = get_csv_with_results(QUEUE_PATH)
39
  if csv_results is None:
40
  passed = False
 
 
 
41
  if not passed:
42
  raise ValueError("No Hugging Face token provided. Skipping evaluation requests and results.")
43
 
44
- return eval_queue_repo, requested_models, csv_results
45
 
46
 
47
  def upload_file(requested_model_name, path_or_fileobj):
 
4
  from huggingface_hub import HfApi, Repository
5
 
6
  TOKEN_HUB = os.environ.get("TOKEN_HUB", None)
7
+ QUEUE_REPO = os.environ.get("QUEUE_REPO", "hf-audio/leaderboard-evals")
8
+ QUEUE_REPO_WHISPER = os.environ.get("QUEUE_REPO_WHISPER", "Steveeeeeeen/whisper-leaderboard-evals")
9
+ QUEUE_PATH = os.environ.get("QUEUE_PATH", "results")
10
+ QUEUE_PATH_WHISPER = os.environ.get("QUEUE_PATH_WHISPER", "whisper-results")
11
 
12
  hf_api = HfApi(
13
  endpoint="https://huggingface.co",
 
31
  repo_type="dataset",
32
  )
33
  eval_queue_repo.git_pull()
34
+
35
+ whisper_eval_queue_repo = Repository(
36
+ local_dir=QUEUE_PATH_WHISPER,
37
+ clone_from=QUEUE_REPO_WHISPER,
38
+ use_auth_token=TOKEN_HUB,
39
+ repo_type="dataset",
40
+ )
41
+ whisper_eval_queue_repo.git_pull()
42
 
43
  # Local directory where dataset repo is cloned + folder with eval requests
44
  directory = QUEUE_PATH / EVAL_REQUESTS_PATH
 
48
  csv_results = get_csv_with_results(QUEUE_PATH)
49
  if csv_results is None:
50
  passed = False
51
+ whisper_csv_results = get_csv_with_results(QUEUE_PATH_WHISPER)
52
+ if whisper_csv_results is None:
53
+ passed = False
54
  if not passed:
55
  raise ValueError("No Hugging Face token provided. Skipping evaluation requests and results.")
56
 
57
+ return eval_queue_repo, requested_models, csv_results, whisper_eval_queue_repo, whisper_csv_results
58
 
59
 
60
  def upload_file(requested_model_name, path_or_fileobj):