Files changed (1) hide show
  1. app.py +126 -4
app.py CHANGED
@@ -36,7 +36,54 @@ AVAILABLE_MODELS = {
36
  'ElevenLabs': 'eleven',
37
  'OpenVoice': 'openvoice',
38
  'Pheme': 'pheme',
39
- 'MetaVoice': 'metavoice'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  }
41
 
42
  SPACE_ID = os.getenv('SPACE_ID')
@@ -118,6 +165,7 @@ if not os.path.isfile(DB_PATH):
118
  # Create DB table (if doesn't exist)
119
  create_db_if_missing()
120
 
 
121
  # Sync local DB with remote repo every 5 minute (only if a change is detected)
122
  scheduler = CommitScheduler(
123
  repo_id=DB_DATASET_ID,
@@ -133,7 +181,7 @@ scheduler = CommitScheduler(
133
  ####################################
134
  # Router API
135
  ####################################
136
- router = Client("TTS-AGI/tts-router", hf_token=os.getenv('HF_TOKEN'))
137
  ####################################
138
  # Gradio app
139
  ####################################
@@ -291,6 +339,9 @@ model_licenses = {
291
  'metavoice': 'Apache 2.0',
292
  'elevenlabs': 'Proprietary',
293
  'whisperspeech': 'MIT',
 
 
 
294
  }
295
  model_links = {
296
  'styletts2': 'https://github.com/yl4579/StyleTTS2',
@@ -561,7 +612,44 @@ def synthandreturn(text):
561
  def predict_and_update_result(text, model, result_storage):
562
  try:
563
  if model in AVAILABLE_MODELS:
564
- result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
565
  else:
566
  result = router.predict(text, model.lower(), api_name="/synthesize")
567
  except:
@@ -593,6 +681,40 @@ def synthandreturn(text):
593
  # doloudnorm(result)
594
  # except:
595
  # pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
596
  results = {}
597
  thread1 = threading.Thread(target=predict_and_update_result, args=(text, mdl1, results))
598
  thread2 = threading.Thread(target=predict_and_update_result, args=(text, mdl2, results))
@@ -709,4 +831,4 @@ with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}
709
  gr.Markdown(f"If you use this data in your publication, please cite us!\n\nCopy the BibTeX citation to cite this source:\n\n```bibtext\n{CITATION_TEXT}\n```\n\nPlease remember that all generated audio clips should be assumed unsuitable for redistribution or commercial use.")
710
 
711
 
712
- demo.queue(api_open=False, default_concurrency_limit=40).launch(show_api=False)
 
36
  'ElevenLabs': 'eleven',
37
  'OpenVoice': 'openvoice',
38
  'Pheme': 'pheme',
39
+ 'MetaVoice': 'metavoice',
40
+
41
+ # '<Space>': <function>#<return-index-of-audio-param>
42
+ # 'coqui/xtts': '1#1',
43
+ # 'collabora/WhisperSpeech': '/whisper_speech_demo#0',
44
+ # 'myshell-ai/OpenVoice': '1#1',
45
+ # 'PolyAI/pheme': '/predict#0', #sleepy HF Space
46
+ # 'mrfakename/MetaVoice-1B-v0.1': '/tts#0',
47
+
48
+ # xVASynth (CPU)
49
+ 'Pendrokar/xVASynth': '/predict#0',
50
+
51
+ # MeloTTS
52
+ # 'mrfakename/MeloTTS': '0#0', #API disabled
53
+
54
+ # CoquiTTS (CPU)
55
+ 'coqui/CoquiTTS': '0#0',
56
+
57
+ # 'pytorch/Tacotron2': '0#0', #old gradio
58
+ }
59
+
60
+ OVERRIDE_INPUTS = {
61
+ 'coqui/xtts': {
62
+ 1: 'en',
63
+ 2: 'https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav', # voice sample - Scarlett Johanson
64
+ 3: 'https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav', # voice sample - Scarlett Johanson
65
+ 4: False, #use_mic
66
+ 5: False, #cleanup_reference
67
+ 6: False, #auto_detect
68
+ },
69
+ 'collabora/WhisperSpeech': {
70
+ 1: 'https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav', # voice sample - Scarlett Johanson
71
+ 2: 'https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav', # voice sample - Scarlett Johanson
72
+ 3: 14.0, #Tempo - Gradio Slider issue: takes min. rather than value
73
+ },
74
+ 'myshell-ai/OpenVoice': {
75
+ 1: 'default', # style
76
+ 2: 'https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav', # voice sample - Scarlett Johanson
77
+ },
78
+ 'PolyAI/pheme': {
79
+ 1: 'YOU1000000044_S0000798', # voice
80
+ 2: 210,
81
+ 3: 0.7, #Tempo - Gradio Slider issue: takes min. rather than value
82
+ },
83
+ 'Pendrokar/xVASynth': {
84
+ 1: 'ccby_nvidia_hifi_92_F', #fine-tuned voice model name
85
+ 3: 1.0, #pacing/duration - Gradio Slider issue: takes min. rather than value
86
+ },
87
  }
88
 
89
  SPACE_ID = os.getenv('SPACE_ID')
 
165
  # Create DB table (if doesn't exist)
166
  create_db_if_missing()
167
 
168
+ hf_token = os.getenv('HF_TOKEN')
169
  # Sync local DB with remote repo every 5 minute (only if a change is detected)
170
  scheduler = CommitScheduler(
171
  repo_id=DB_DATASET_ID,
 
181
  ####################################
182
  # Router API
183
  ####################################
184
+ router = Client("TTS-AGI/tts-router", hf_token=hf_token)
185
  ####################################
186
  # Gradio app
187
  ####################################
 
339
  'metavoice': 'Apache 2.0',
340
  'elevenlabs': 'Proprietary',
341
  'whisperspeech': 'MIT',
342
+
343
+ 'Pendrokar/xVASynth': 'GPT3',
344
+ 'Pendrokar/xVASynthStreaming': 'GPT3',
345
  }
346
  model_links = {
347
  'styletts2': 'https://github.com/yl4579/StyleTTS2',
 
612
  def predict_and_update_result(text, model, result_storage):
613
  try:
614
  if model in AVAILABLE_MODELS:
615
+ if '/' in model:
616
+ # Use public HF Space
617
+ mdl_space = Client(model, hf_token=hf_token)
618
+ # assume the index is one of the first 9 return params
619
+ return_audio_index = int(AVAILABLE_MODELS[model][-1])
620
+ endpoints = mdl_space.view_api(all_endpoints=True, print_info=False, return_format='dict')
621
+
622
+ api_name = None
623
+ fn_index = None
624
+ # has named endpoint
625
+ if '/' == AVAILABLE_MODELS[model][:1]:
626
+ # assume the index is one of the first 9 params
627
+ api_name = AVAILABLE_MODELS[model][:-2]
628
+
629
+ space_inputs = _get_param_examples(
630
+ endpoints['named_endpoints'][api_name]['parameters']
631
+ )
632
+ # has unnamed endpoint
633
+ else:
634
+ # endpoint index is the first character
635
+ fn_index = int(AVAILABLE_MODELS[model][0])
636
+
637
+ space_inputs = _get_param_examples(
638
+ endpoints['unnamed_endpoints'][str(fn_index)]['parameters']
639
+ )
640
+
641
+ space_inputs = _override_params(space_inputs, model)
642
+
643
+ # force text
644
+ space_inputs[0] = text
645
+
646
+ results = mdl_space.predict(*space_inputs, api_name=api_name, fn_index=fn_index)
647
+
648
+ # return path to audio
649
+ result = results[return_audio_index] if (not isinstance(results, str)) else results
650
+ else:
651
+ # Use the private HF Space
652
+ result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
653
  else:
654
  result = router.predict(text, model.lower(), api_name="/synthesize")
655
  except:
 
681
  # doloudnorm(result)
682
  # except:
683
  # pass
684
+
685
+ def _get_param_examples(parameters):
686
+ example_inputs = []
687
+ for param_info in parameters:
688
+ if (
689
+ param_info['component'] == 'Radio'
690
+ or param_info['component'] == 'Dropdown'
691
+ or param_info['component'] == 'Audio'
692
+ or param_info['python_type']['type'] == 'str'
693
+ ):
694
+ example_inputs.append(str(param_info['example_input']))
695
+ continue
696
+ if param_info['python_type']['type'] == 'int':
697
+ example_inputs.append(int(param_info['example_input']))
698
+ continue
699
+ if param_info['python_type']['type'] == 'float':
700
+ example_inputs.append(float(param_info['example_input']))
701
+ continue
702
+ if param_info['python_type']['type'] == 'bool':
703
+ example_inputs.append(bool(param_info['example_input']))
704
+ continue
705
+
706
+ return example_inputs
707
+
708
+ def _override_params(inputs, modelname):
709
+ try:
710
+ for key,value in OVERRIDE_INPUTS[modelname].items():
711
+ inputs[key] = value
712
+ print(f"Default inputs overridden for {modelname}")
713
+ except:
714
+ pass
715
+
716
+ return inputs
717
+
718
  results = {}
719
  thread1 = threading.Thread(target=predict_and_update_result, args=(text, mdl1, results))
720
  thread2 = threading.Thread(target=predict_and_update_result, args=(text, mdl2, results))
 
831
  gr.Markdown(f"If you use this data in your publication, please cite us!\n\nCopy the BibTeX citation to cite this source:\n\n```bibtext\n{CITATION_TEXT}\n```\n\nPlease remember that all generated audio clips should be assumed unsuitable for redistribution or commercial use.")
832
 
833
 
834
+ demo.queue(api_open=False, default_concurrency_limit=40).launch(show_api=False)