rjzevallos commited on
Commit
a6be7d3
1 Parent(s): d165e3d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -124
app.py CHANGED
@@ -134,69 +134,8 @@ scheduler = CommitScheduler(
134
  allow_patterns=DB_NAME,
135
  )
136
 
137
- # Load audio dataset
138
- # audio_dataset = load_dataset(AUDIO_DATASET_ID)
139
 
140
- ####################################
141
- # Router API
142
- ####################################
143
- router = Client("TTS-AGI/tts-router", hf_token=os.getenv('HF_TOKEN'))
144
- ####################################
145
- # Gradio app
146
- ####################################
147
- MUST_BE_LOGGEDIN = "Please login with Hugging Face to participate in the TTS Arena."
148
- DESCR = """
149
- # TTS Arena: Benchmarking TTS Models in the Wild
150
- Vote to help the community find the best available text-to-speech model!
151
- """.strip()
152
- # INSTR = """
153
- # ## Instructions
154
-
155
- # * Listen to two anonymous models
156
- # * Vote on which synthesized audio sounds more natural to you
157
- # * If there's a tie, click Skip
158
-
159
- # **When you're ready to begin, login and begin voting!** The model names will be revealed once you vote.
160
- # """.strip()
161
- INSTR = """
162
- ## 🗳️ Vote
163
- * Input text (English only) to synthesize audio (or press 🎲 for random text).
164
- * Listen to the two audio clips, one after the other.
165
- * Vote on which audio sounds more natural to you.
166
- * _Note: Model names are revealed after the vote is cast._
167
- Note: It may take up to 30 seconds to synthesize audio.
168
- """.strip()
169
- request = ''
170
- if SPACE_ID:
171
- request = f"""
172
- ### Request a model
173
- Please [create a Discussion](https://huggingface.co/spaces/{SPACE_ID}/discussions/new) to request a model.
174
- """
175
- ABOUT = f"""
176
- ## 📄 About
177
- The TTS Arena evaluates leading speech synthesis models. It is inspired by LMsys's [Chatbot Arena](https://chat.lmsys.org/).
178
- ### Motivation
179
- The field of speech synthesis has long lacked an accurate method to measure the quality of different models. Objective metrics like WER (word error rate) are unreliable measures of model quality, and subjective measures such as MOS (mean opinion score) are typically small-scale experiments conducted with few listeners. As a result, these measurements are generally not useful for comparing two models of roughly similar quality. To address these drawbacks, we are inviting the community to rank models in an easy-to-use interface, and opening it up to the public in order to make both the opportunity to rank models, as well as the results, more easily accessible to everyone.
180
- ### The Arena
181
- The leaderboard allows a user to enter text, which will be synthesized by two models. After listening to each sample, the user can vote on which model sounds more natural. Due to the risks of human bias and abuse, model names are revealed only after a vote is submitted.
182
- ### Credits
183
- Thank you to the following individuals who helped make this project possible:
184
- * VB ([Twitter](https://twitter.com/reach_vb) / [Hugging Face](https://huggingface.co/reach-vb))
185
- * Clémentine Fourrier ([Twitter](https://twitter.com/clefourrier) / [Hugging Face](https://huggingface.co/clefourrier))
186
- * Lucain Pouget ([Twitter](https://twitter.com/Wauplin) / [Hugging Face](https://huggingface.co/Wauplin))
187
- * Yoach Lacombe ([Twitter](https://twitter.com/yoachlacombe) / [Hugging Face](https://huggingface.co/ylacombe))
188
- * Main Horse ([Twitter](https://twitter.com/main_horse) / [Hugging Face](https://huggingface.co/main-horse))
189
- * Sanchit Gandhi ([Twitter](https://twitter.com/sanchitgandhi99) / [Hugging Face](https://huggingface.co/sanchit-gandhi))
190
- * Apolinário Passos ([Twitter](https://twitter.com/multimodalart) / [Hugging Face](https://huggingface.co/multimodalart))
191
- * Pedro Cuenca ([Twitter](https://twitter.com/pcuenq) / [Hugging Face](https://huggingface.co/pcuenq))
192
- {request}
193
- ### Privacy statement
194
- We may store text you enter and generated audio. We store a unique ID for each session. You agree that we may collect, share, and/or publish any data you input for research and/or commercial purposes.
195
- ### License
196
- Generated audio clips cannot be redistributed and may be used for personal, non-commercial use only.
197
- Random sentences are sourced from a filtered subset of the [Harvard Sentences](https://www.cs.columbia.edu/~hgs/audio/harvard.html).
198
- """.strip()
199
- LDESC = """
200
  ## 🏆 Leaderboard
201
  Vote to help the community determine the best text-to-speech (TTS) models.
202
  The leaderboard displays models in descending order of how natural they sound (based on votes cast by the community).
@@ -205,12 +144,6 @@ Important: In order to help keep results fair, the leaderboard hides results by
205
 
206
 
207
 
208
-
209
- # def reload_audio_dataset():
210
- # global audio_dataset
211
- # audio_dataset = load_dataset(AUDIO_DATASET_ID)
212
- # return 'Reload Audio Dataset'
213
-
214
  def del_db(txt):
215
  if not txt.lower() == 'delete db':
216
  raise gr.Error('You did not enter "delete db"')
@@ -298,17 +231,7 @@ model_links = {
298
  'speecht5': 'https://github.com/microsoft/SpeechT5',
299
  'metavoice': 'https://github.com/metavoiceio/metavoice-src',
300
  }
301
- # def get_random_split(existing_split=None):
302
- # choice = random.choice(list(audio_dataset.keys()))
303
- # if existing_split and choice == existing_split:
304
- # return get_random_split(choice)
305
- # else:
306
- # return choice
307
-
308
- # def get_random_splits():
309
- # choice1 = get_random_split()
310
- # choice2 = get_random_split(choice1)
311
- # return (choice1, choice2)
312
  def model_license(name):
313
  print(name)
314
  for k, v in AVAILABLE_MODELS.items():
@@ -317,6 +240,8 @@ def model_license(name):
317
  return model_licenses[v]
318
  print('---')
319
  return 'Unknown'
 
 
320
  def get_leaderboard(reveal_prelim = False):
321
  conn = get_db()
322
  cursor = conn.cursor()
@@ -350,10 +275,13 @@ def get_leaderboard(reveal_prelim = False):
350
  # df = df[['order', 'name', 'score', 'license', 'votes']]
351
  df = df[['order', 'name', 'score', 'votes']]
352
  return df
 
 
353
  def mkuuid(uid):
354
  if not uid:
355
  uid = uuid.uuid4()
356
  return uid
 
357
  def upvote_model(model, uname):
358
  conn = get_db()
359
  cursor = conn.cursor()
@@ -364,6 +292,8 @@ def upvote_model(model, uname):
364
  with scheduler.lock:
365
  conn.commit()
366
  cursor.close()
 
 
367
  def log_text(text):
368
  conn = get_db()
369
  cursor = conn.cursor()
@@ -371,6 +301,8 @@ def log_text(text):
371
  with scheduler.lock:
372
  conn.commit()
373
  cursor.close()
 
 
374
  def downvote_model(model, uname):
375
  conn = get_db()
376
  cursor = conn.cursor()
@@ -471,51 +403,7 @@ with gr.Blocks() as leaderboard:
471
  reloadbtn.click(get_leaderboard, inputs=[reveal_prelim], outputs=[df])
472
  # gr.Markdown("DISCLAIMER: The licenses listed may not be accurate or up to date, you are responsible for checking the licenses before using the models. Also note that some models may have additional usage restrictions.")
473
 
474
- # with gr.Blocks() as vote:
475
- # useridstate = gr.State()
476
- # gr.Markdown(INSTR)
477
- # # gr.LoginButton()
478
- # with gr.Row():
479
- # gr.HTML('<div align="left"><h3>Model A</h3></div>')
480
- # gr.HTML('<div align="right"><h3>Model B</h3></div>')
481
- # model1 = gr.Textbox(interactive=False, visible=False, lines=1, max_lines=1)
482
- # model2 = gr.Textbox(interactive=False, visible=False, lines=1, max_lines=1)
483
- # # with gr.Group():
484
- # # with gr.Row():
485
- # # prevmodel1 = gr.Textbox(interactive=False, show_label=False, container=False, value="Vote to reveal model A")
486
- # # prevmodel2 = gr.Textbox(interactive=False, show_label=False, container=False, value="Vote to reveal model B", text_align="right")
487
- # # with gr.Row():
488
- # # aud1 = gr.Audio(interactive=False, show_label=False, show_download_button=False, show_share_button=False, waveform_options={'waveform_progress_color': '#3C82F6'})
489
- # # aud2 = gr.Audio(interactive=False, show_label=False, show_download_button=False, show_share_button=False, waveform_options={'waveform_progress_color': '#3C82F6'})
490
- # with gr.Group():
491
- # with gr.Row():
492
- # with gr.Column():
493
- # with gr.Group():
494
- # prevmodel1 = gr.Textbox(interactive=False, show_label=False, container=False, value="Vote to reveal model A", lines=1, max_lines=1)
495
- # aud1 = gr.Audio(interactive=False, show_label=False, show_download_button=False, show_share_button=False, waveform_options={'waveform_progress_color': '#3C82F6'})
496
- # with gr.Column():
497
- # with gr.Group():
498
- # prevmodel2 = gr.Textbox(interactive=False, show_label=False, container=False, value="Vote to reveal model B", text_align="right", lines=1, max_lines=1)
499
- # aud2 = gr.Audio(interactive=False, show_label=False, show_download_button=False, show_share_button=False, waveform_options={'waveform_progress_color': '#3C82F6'})
500
-
501
-
502
- # with gr.Row():
503
- # abetter = gr.Button("A is Better", variant='primary', scale=4)
504
- # # skipbtn = gr.Button("Skip", scale=1)
505
- # bbetter = gr.Button("B is Better", variant='primary', scale=4)
506
- # with gr.Row():
507
- # bothbad = gr.Button("Both are Bad", scale=2)
508
- # skipbtn = gr.Button("Skip", scale=1)
509
- # bothgood = gr.Button("Both are Good", scale=2)
510
- # outputs = [aud1, aud2, model1, model2, useridstate, prevmodel1, prevmodel2]
511
- # abetter.click(a_is_better, outputs=outputs, inputs=[model1, model2, useridstate])
512
- # bbetter.click(b_is_better, outputs=outputs, inputs=[model1, model2, useridstate])
513
- # skipbtn.click(b_is_better, outputs=outputs, inputs=[model1, model2, useridstate])
514
-
515
- # bothbad.click(both_bad, outputs=outputs, inputs=[model1, model2, useridstate])
516
- # bothgood.click(both_good, outputs=outputs, inputs=[model1, model2, useridstate])
517
-
518
- # vote.load(reload, outputs=[aud1, aud2, model1, model2])
519
  def doloudnorm(path):
520
  data, rate = sf.read(path)
521
  meter = pyln.Meter(rate)
 
134
  allow_patterns=DB_NAME,
135
  )
136
 
 
 
137
 
138
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  ## 🏆 Leaderboard
140
  Vote to help the community determine the best text-to-speech (TTS) models.
141
  The leaderboard displays models in descending order of how natural they sound (based on votes cast by the community).
 
144
 
145
 
146
 
 
 
 
 
 
 
147
  def del_db(txt):
148
  if not txt.lower() == 'delete db':
149
  raise gr.Error('You did not enter "delete db"')
 
231
  'speecht5': 'https://github.com/microsoft/SpeechT5',
232
  'metavoice': 'https://github.com/metavoiceio/metavoice-src',
233
  }
234
+
 
 
 
 
 
 
 
 
 
 
235
  def model_license(name):
236
  print(name)
237
  for k, v in AVAILABLE_MODELS.items():
 
240
  return model_licenses[v]
241
  print('---')
242
  return 'Unknown'
243
+
244
+
245
  def get_leaderboard(reveal_prelim = False):
246
  conn = get_db()
247
  cursor = conn.cursor()
 
275
  # df = df[['order', 'name', 'score', 'license', 'votes']]
276
  df = df[['order', 'name', 'score', 'votes']]
277
  return df
278
+
279
+
280
  def mkuuid(uid):
281
  if not uid:
282
  uid = uuid.uuid4()
283
  return uid
284
+
285
  def upvote_model(model, uname):
286
  conn = get_db()
287
  cursor = conn.cursor()
 
292
  with scheduler.lock:
293
  conn.commit()
294
  cursor.close()
295
+
296
+
297
  def log_text(text):
298
  conn = get_db()
299
  cursor = conn.cursor()
 
301
  with scheduler.lock:
302
  conn.commit()
303
  cursor.close()
304
+
305
+
306
  def downvote_model(model, uname):
307
  conn = get_db()
308
  cursor = conn.cursor()
 
403
  reloadbtn.click(get_leaderboard, inputs=[reveal_prelim], outputs=[df])
404
  # gr.Markdown("DISCLAIMER: The licenses listed may not be accurate or up to date, you are responsible for checking the licenses before using the models. Also note that some models may have additional usage restrictions.")
405
 
406
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
407
  def doloudnorm(path):
408
  data, rate = sf.read(path)
409
  meter = pyln.Meter(rate)