Pendrokar commited on
Commit
c93450a
Β·
1 Parent(s): d0fb122

Contenders tab: query relevant TTS models

Browse files
Files changed (1) hide show
  1. app.py +30 -8
app.py CHANGED
@@ -117,6 +117,7 @@ HF_SPACES = {
117
  'function': '1',
118
  'text_param_index': 0,
119
  'return_audio_index': 1,
 
120
  },
121
  # WhisperSpeech
122
  'collabora/WhisperSpeech': {
@@ -124,6 +125,7 @@ HF_SPACES = {
124
  'function': '/whisper_speech_demo',
125
  'text_param_index': 0,
126
  'return_audio_index': 0,
 
127
  },
128
  # OpenVoice (MyShell.ai)
129
  'myshell-ai/OpenVoice': {
@@ -131,6 +133,7 @@ HF_SPACES = {
131
  'function': '1',
132
  'text_param_index': 0,
133
  'return_audio_index': 1,
 
134
  },
135
  # OpenVoice v2 (MyShell.ai)
136
  'myshell-ai/OpenVoiceV2': {
@@ -138,13 +141,15 @@ HF_SPACES = {
138
  'function': '1',
139
  'text_param_index': 0,
140
  'return_audio_index': 1,
 
141
  },
142
  # MetaVoice
143
  'mrfakename/MetaVoice-1B-v0.1': {
144
- 'name':'MetaVoice',
145
  'function': '/tts',
146
  'text_param_index': 0,
147
  'return_audio_index': 0,
 
148
  },
149
  # xVASynth (CPU)
150
  'Pendrokar/xVASynth-TTS': {
@@ -152,6 +157,7 @@ HF_SPACES = {
152
  'function': '/predict',
153
  'text_param_index': 0,
154
  'return_audio_index': 0,
 
155
  },
156
  # CoquiTTS (CPU)
157
  'coqui/CoquiTTS': {
@@ -159,6 +165,7 @@ HF_SPACES = {
159
  'function': '0',
160
  'text_param_index': 0,
161
  'return_audio_index': 0,
 
162
  },
163
  # HierSpeech_TTS
164
  'LeeSangHoon/HierSpeech_TTS': {
@@ -166,6 +173,7 @@ HF_SPACES = {
166
  'function': '/predict',
167
  'text_param_index': 0,
168
  'return_audio_index': 0,
 
169
  },
170
  # MeloTTS (MyShell.ai)
171
  'mrfakename/MeloTTS': {
@@ -173,6 +181,7 @@ HF_SPACES = {
173
  'function': '/synthesize',
174
  'text_param_index': 0,
175
  'return_audio_index': 0,
 
176
  },
177
 
178
  # Parler
@@ -182,6 +191,7 @@ HF_SPACES = {
182
  'text_param_index': 0,
183
  'return_audio_index': 0,
184
  'is_zero_gpu_space': True,
 
185
  },
186
  # Parler Mini
187
  # 'parler-tts/parler_tts': {
@@ -190,6 +200,7 @@ HF_SPACES = {
190
  # 'text_param_index': 0,
191
  # 'return_audio_index': 0,
192
  # 'is_zero_gpu_space': True,
 
193
  # },
194
  # Parler Mini which using Expresso dataset
195
  'parler-tts/parler-tts-expresso': {
@@ -198,6 +209,7 @@ HF_SPACES = {
198
  'text_param_index': 0,
199
  'return_audio_index': 0,
200
  'is_zero_gpu_space': True,
 
201
  },
202
 
203
  # Microsoft Edge TTS
@@ -207,6 +219,7 @@ HF_SPACES = {
207
  'text_param_index': 0,
208
  'return_audio_index': 0,
209
  'is_proprietary': True,
 
210
  },
211
 
212
  # Fish Speech
@@ -215,6 +228,7 @@ HF_SPACES = {
215
  'function': '/inference_wrapper',
216
  'text_param_index': 0,
217
  'return_audio_index': 1,
 
218
  },
219
 
220
  # E2/F5 TTS
@@ -224,6 +238,7 @@ HF_SPACES = {
224
  'text_param_index': 2,
225
  'return_audio_index': 0,
226
  'is_zero_gpu_space': True,
 
227
  },
228
 
229
  # TTS w issues
@@ -543,6 +558,7 @@ Generated audio clips cannot be redistributed and may be used for personal, non-
543
 
544
  Random sentences are sourced from a filtered subset of the [Harvard Sentences](https://www.cs.columbia.edu/~hgs/audio/harvard.html).
545
  """.strip()
 
546
  LDESC = f"""
547
  ## πŸ† Leaderboard
548
 
@@ -552,19 +568,25 @@ The leaderboard displays models in descending order of how natural they sound (b
552
 
553
  Important: In order to help keep results fair, the leaderboard hides results by default until the number of votes passes a threshold. Tick the `Reveal preliminary results` to show models without sufficient votes. Please note that preliminary results may be inaccurate. [This dataset is public](https://huggingface.co/datasets/{DB_DATASET_ID}) and only saves the hardcoded sentences while keeping the voters anonymous.
554
  """.strip()
 
555
  TTS_INFO = f"""
556
  ## πŸ—£ Contenders
557
 
558
  ### Open Source TTS capabilities table
559
 
560
- See the dataset itself for the legend and more in depth information for each model. Not all the models listed have a HF Space to add to this TTS Arena.
561
  """.strip()
562
- TTS_IFRAME = """
 
 
 
 
 
563
  <iframe
564
- src="https://huggingface.co/datasets/Pendrokar/open_tts_tracker/embed/viewer/default/train"
565
- frameborder="0"
566
- width="100%"
567
- height="560px"
568
  ></iframe>
569
  """.strip()
570
 
@@ -1576,7 +1598,7 @@ with gr.Blocks() as about:
1576
  gr.Markdown(ABOUT)
1577
  with gr.Blocks() as tts_info:
1578
  gr.Markdown(TTS_INFO)
1579
- gr.HTML(TTS_IFRAME)
1580
  # with gr.Blocks() as admin:
1581
  # rdb = gr.Button("Reload Audio Dataset")
1582
  # # rdb.click(reload_audio_dataset, outputs=rdb)
 
117
  'function': '1',
118
  'text_param_index': 0,
119
  'return_audio_index': 1,
120
+ 'series': 'XTTS',
121
  },
122
  # WhisperSpeech
123
  'collabora/WhisperSpeech': {
 
125
  'function': '/whisper_speech_demo',
126
  'text_param_index': 0,
127
  'return_audio_index': 0,
128
+ 'series': 'WhisperSpeech',
129
  },
130
  # OpenVoice (MyShell.ai)
131
  'myshell-ai/OpenVoice': {
 
133
  'function': '1',
134
  'text_param_index': 0,
135
  'return_audio_index': 1,
136
+ 'series': 'OpenVoice',
137
  },
138
  # OpenVoice v2 (MyShell.ai)
139
  'myshell-ai/OpenVoiceV2': {
 
141
  'function': '1',
142
  'text_param_index': 0,
143
  'return_audio_index': 1,
144
+ 'series': 'OpenVoice',
145
  },
146
  # MetaVoice
147
  'mrfakename/MetaVoice-1B-v0.1': {
148
+ 'name':'MetaVoice-1B',
149
  'function': '/tts',
150
  'text_param_index': 0,
151
  'return_audio_index': 0,
152
+ 'series': 'MetaVoice-1B',
153
  },
154
  # xVASynth (CPU)
155
  'Pendrokar/xVASynth-TTS': {
 
157
  'function': '/predict',
158
  'text_param_index': 0,
159
  'return_audio_index': 0,
160
+ 'series': 'xVASynth',
161
  },
162
  # CoquiTTS (CPU)
163
  'coqui/CoquiTTS': {
 
165
  'function': '0',
166
  'text_param_index': 0,
167
  'return_audio_index': 0,
168
+ 'series': 'CoquiTTS',
169
  },
170
  # HierSpeech_TTS
171
  'LeeSangHoon/HierSpeech_TTS': {
 
173
  'function': '/predict',
174
  'text_param_index': 0,
175
  'return_audio_index': 0,
176
+ 'series': 'HierSpeech++',
177
  },
178
  # MeloTTS (MyShell.ai)
179
  'mrfakename/MeloTTS': {
 
181
  'function': '/synthesize',
182
  'text_param_index': 0,
183
  'return_audio_index': 0,
184
+ 'series': 'MeloTTS',
185
  },
186
 
187
  # Parler
 
191
  'text_param_index': 0,
192
  'return_audio_index': 0,
193
  'is_zero_gpu_space': True,
194
+ 'series': 'Parler',
195
  },
196
  # Parler Mini
197
  # 'parler-tts/parler_tts': {
 
200
  # 'text_param_index': 0,
201
  # 'return_audio_index': 0,
202
  # 'is_zero_gpu_space': True,
203
+ # 'series': 'Parler',
204
  # },
205
  # Parler Mini which using Expresso dataset
206
  'parler-tts/parler-tts-expresso': {
 
209
  'text_param_index': 0,
210
  'return_audio_index': 0,
211
  'is_zero_gpu_space': True,
212
+ 'series': 'Parler',
213
  },
214
 
215
  # Microsoft Edge TTS
 
219
  'text_param_index': 0,
220
  'return_audio_index': 0,
221
  'is_proprietary': True,
222
+ 'series': 'Edge TTS',
223
  },
224
 
225
  # Fish Speech
 
228
  'function': '/inference_wrapper',
229
  'text_param_index': 0,
230
  'return_audio_index': 1,
231
+ 'series': 'Fish Speech',
232
  },
233
 
234
  # E2/F5 TTS
 
238
  'text_param_index': 2,
239
  'return_audio_index': 0,
240
  'is_zero_gpu_space': True,
241
+ 'series': 'E2/F5 TTS',
242
  },
243
 
244
  # TTS w issues
 
558
 
559
  Random sentences are sourced from a filtered subset of the [Harvard Sentences](https://www.cs.columbia.edu/~hgs/audio/harvard.html).
560
  """.strip()
561
+
562
  LDESC = f"""
563
  ## πŸ† Leaderboard
564
 
 
568
 
569
  Important: In order to help keep results fair, the leaderboard hides results by default until the number of votes passes a threshold. Tick the `Reveal preliminary results` to show models without sufficient votes. Please note that preliminary results may be inaccurate. [This dataset is public](https://huggingface.co/datasets/{DB_DATASET_ID}) and only saves the hardcoded sentences while keeping the voters anonymous.
570
  """.strip()
571
+
572
  TTS_INFO = f"""
573
  ## πŸ—£ Contenders
574
 
575
  ### Open Source TTS capabilities table
576
 
577
+ See [the below dataset itself](https://huggingface.co/datasets/Pendrokar/open_tts_tracker) for the legend and more in depth information for each model.
578
  """.strip()
579
+
580
+ model_series = []
581
+ for model in HF_SPACES.values():
582
+ model_series.append('%27'+ model['series'].replace('+', '%2B') +'%27')
583
+ TTS_DATASET_IFRAME_ORDER = '%2C+'.join(model_series)
584
+ TTS_DATASET_IFRAME = f"""
585
  <iframe
586
+ src="https://huggingface.co/datasets/Pendrokar/open_tts_tracker/embed/viewer/default/train?sql_console=true&sql=--+The+SQL+console+is+powered+by+DuckDB+WASM+and+runs+entirely+in+the+browser.%0A--+Get+started+by+typing+a+query+or+selecting+a+view+from+the+options+below.%0ASELECT+*%2C+%22Name%22+IN+%28{TTS_DATASET_IFRAME_ORDER}%29+AS+%22In+arena%22+FROM+train+WHERE+%22Insta-clone+%F0%9F%91%A5%22+IS+NOT+NULL+ORDER+BY+%22In+arena%22+DESC+LIMIT+50%3B&views%5B%5D=train"
587
+ frameborder="0"
588
+ width="100%"
589
+ height="650px"
590
  ></iframe>
591
  """.strip()
592
 
 
1598
  gr.Markdown(ABOUT)
1599
  with gr.Blocks() as tts_info:
1600
  gr.Markdown(TTS_INFO)
1601
+ gr.HTML(TTS_DATASET_IFRAME)
1602
  # with gr.Blocks() as admin:
1603
  # rdb = gr.Button("Reload Audio Dataset")
1604
  # # rdb.click(reload_audio_dataset, outputs=rdb)