Pendrokar commited on
Commit
a886688
1 Parent(s): 380840b

added TTS: E2/F5 TTS; F5 model; default voice sample switched to XTTS's

Browse files
Files changed (2) hide show
  1. README.md +1 -0
  2. app.py +28 -8
README.md CHANGED
@@ -20,5 +20,6 @@ models:
20
  - parler-tts/parler-tts-mini-v1
21
  - Pendrokar/xvapitch_expresso
22
  - WhisperSpeech/WhisperSpeech
 
23
  sdk_version: 5.0.2
24
  ---
 
20
  - parler-tts/parler-tts-mini-v1
21
  - Pendrokar/xvapitch_expresso
22
  - WhisperSpeech/WhisperSpeech
23
+ - SWivid/F5-TTS
24
  sdk_version: 5.0.2
25
  ---
app.py CHANGED
@@ -5,7 +5,7 @@ from datasets import load_dataset
5
  import threading, time, uuid, sqlite3, shutil, os, random, asyncio, threading
6
  from pathlib import Path
7
  from huggingface_hub import CommitScheduler, delete_file, hf_hub_download
8
- from gradio_client import Client, file
9
  import pyloudnorm as pyln
10
  import soundfile as sf
11
  import librosa
@@ -84,6 +84,10 @@ AVAILABLE_MODELS = {
84
  'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
85
  'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # 4.29 4.32 4.36.1
86
 
 
 
 
 
87
  # # Parler
88
  'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
89
  'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
@@ -199,6 +203,7 @@ HF_SPACES = {
199
  'is_proprietary': True,
200
  },
201
 
 
202
  'fishaudio/fish-speech-1': {
203
  'name': 'Fish Speech',
204
  'function': '/inference_wrapper',
@@ -206,6 +211,14 @@ HF_SPACES = {
206
  'return_audio_index': 1,
207
  },
208
 
 
 
 
 
 
 
 
 
209
  # TTS w issues
210
  # 'PolyAI/pheme': '/predict#0', #sleepy HF Space
211
  # 'amphion/Text-to-Speech': '/predict#0', #takes a whole minute to synthesize
@@ -217,15 +230,15 @@ HF_SPACES = {
217
  # 'fishaudio/fish-speech-1': '/inference_wrapper#0', heavy hallucinations
218
  }
219
 
220
- # for zero-shot TTS - voice sample of Scarlett Johanson
221
- DEFAULT_VOICE_SAMPLE_STR = 'https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav'
222
- DEFAULT_VOICE_SAMPLE = file(DEFAULT_VOICE_SAMPLE_STR)
223
- DEFAULT_VOICE_TRANSCRIPT = "In the first half of the 20th century, science fiction familiarized the world with the concept of artificially intelligent robots. It began with the “heartless” Tin man from the Wizard of Oz and continued with the humanoid robot that impersonated Maria in Metropolis. By the 1950s, we had a generation of scientists, mathematicians, and philosophers with the concept of artificial intelligence (or AI) culturally assimilated in their minds."
224
 
225
  OVERRIDE_INPUTS = {
226
  'coqui/xtts': {
227
  1: 'en',
228
- 2: 'https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav', # voice sample
229
  3: None, # mic voice sample
230
  4: False, #use_mic
231
  5: False, #cleanup_reference
@@ -260,7 +273,7 @@ OVERRIDE_INPUTS = {
260
  1: 'LikeManyWaters', # voice
261
  },
262
  'LeeSangHoon/HierSpeech_TTS': {
263
- 1: file('https://huggingface.co/spaces/LeeSangHoon/HierSpeech_TTS/resolve/main/example/female.wav'), # voice sample
264
  2: 0.333,
265
  3: 0.333,
266
  4: 1,
@@ -300,7 +313,7 @@ OVERRIDE_INPUTS = {
300
 
301
  'fishaudio/fish-speech-1': {
302
  1: True, # enable_reference_audio
303
- 2: file('https://huggingface.co/spaces/fishaudio/fish-speech-1/resolve/main/examples/English.wav'), # reference_audio
304
  3: 'In the ancient land of Eldoria, where the skies were painted with shades of mystic hues and the forests whispered secrets of old, there existed a dragon named Zephyros. Unlike the fearsome tales of dragons that plagued human hearts with terror, Zephyros was a creature of wonder and wisdom, revered by all who knew of his existence.', # reference_text
305
  4: 0, # max_new_tokens
306
  5: 200, # chunk_length
@@ -310,6 +323,13 @@ OVERRIDE_INPUTS = {
310
  9: 1, # batch_infer_num
311
  10: False, # if_load_asr_model
312
  },
 
 
 
 
 
 
 
313
  }
314
 
315
  hf_clients = {}
 
5
  import threading, time, uuid, sqlite3, shutil, os, random, asyncio, threading
6
  from pathlib import Path
7
  from huggingface_hub import CommitScheduler, delete_file, hf_hub_download
8
+ from gradio_client import Client, handle_file
9
  import pyloudnorm as pyln
10
  import soundfile as sf
11
  import librosa
 
84
  'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
85
  'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # 4.29 4.32 4.36.1
86
 
87
+ # E2 & F5 TTS
88
+ # F5 model
89
+ 'mrfakename/E2-F5-TTS': 'mrfakename/E2-F5-TTS', # 5.0
90
+
91
  # # Parler
92
  'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
93
  'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
 
203
  'is_proprietary': True,
204
  },
205
 
206
+ # Fish Speech
207
  'fishaudio/fish-speech-1': {
208
  'name': 'Fish Speech',
209
  'function': '/inference_wrapper',
 
211
  'return_audio_index': 1,
212
  },
213
 
214
+ # E2/F5 TTS
215
+ 'mrfakename/E2-F5-TTS': {
216
+ 'name': 'F5 of E2 TTS',
217
+ 'function': '/infer',
218
+ 'text_param_index': 2,
219
+ 'return_audio_index': 0,
220
+ },
221
+
222
  # TTS w issues
223
  # 'PolyAI/pheme': '/predict#0', #sleepy HF Space
224
  # 'amphion/Text-to-Speech': '/predict#0', #takes a whole minute to synthesize
 
230
  # 'fishaudio/fish-speech-1': '/inference_wrapper#0', heavy hallucinations
231
  }
232
 
233
+ # for zero-shot TTS - voice sample used by XTTS (11 seconds)
234
+ DEFAULT_VOICE_SAMPLE_STR = 'https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav'
235
+ DEFAULT_VOICE_SAMPLE = handle_file(DEFAULT_VOICE_SAMPLE_STR)
236
+ DEFAULT_VOICE_TRANSCRIPT = "The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory."
237
 
238
  OVERRIDE_INPUTS = {
239
  'coqui/xtts': {
240
  1: 'en',
241
+ 2: DEFAULT_VOICE_SAMPLE_STR, # voice sample
242
  3: None, # mic voice sample
243
  4: False, #use_mic
244
  5: False, #cleanup_reference
 
273
  1: 'LikeManyWaters', # voice
274
  },
275
  'LeeSangHoon/HierSpeech_TTS': {
276
+ 1: handle_file('https://huggingface.co/spaces/LeeSangHoon/HierSpeech_TTS/resolve/main/example/female.wav'), # voice sample
277
  2: 0.333,
278
  3: 0.333,
279
  4: 1,
 
313
 
314
  'fishaudio/fish-speech-1': {
315
  1: True, # enable_reference_audio
316
+ 2: handle_file('https://huggingface.co/spaces/fishaudio/fish-speech-1/resolve/main/examples/English.wav'), # reference_audio
317
  3: 'In the ancient land of Eldoria, where the skies were painted with shades of mystic hues and the forests whispered secrets of old, there existed a dragon named Zephyros. Unlike the fearsome tales of dragons that plagued human hearts with terror, Zephyros was a creature of wonder and wisdom, revered by all who knew of his existence.', # reference_text
318
  4: 0, # max_new_tokens
319
  5: 200, # chunk_length
 
323
  9: 1, # batch_infer_num
324
  10: False, # if_load_asr_model
325
  },
326
+
327
+ 'mrfakename/E2-F5-TTS': {
328
+ 0: DEFAULT_VOICE_SAMPLE, # voice sample
329
+ 1: DEFAULT_VOICE_TRANSCRIPT, # transcript of sample (< 15 seconds required)
330
+ 3: "F5-TTS", # model
331
+ 4: False, # cleanup silence
332
+ },
333
  }
334
 
335
  hf_clients = {}