Spaces:
Running
Running
the test TTS python scripts
Browse files- test_tts_e2_f5_e2.py +14 -0
- test_tts_e2_f5_f5.py +14 -0
- test_tts_edge.py +13 -0
- test_tts_fish.py +21 -0
- test_tts_melo.py +13 -0
- test_tts_metavoice.py +15 -0
- test_tts_parler.py +12 -0
- test_tts_parler_ex.py +12 -0
- test_tts_tortoise.py +13 -0
- test_tts_voicecraft.py +29 -0
- test_tts_whisper.py +34 -0
- test_tts_xtts.py +17 -0
- test_tts_xva.py +23 -0
test_tts_e2_f5_e2.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from gradio_client import Client, handle_file
|
3 |
+
|
4 |
+
client = Client("mrfakename/E2-F5-TTS", hf_token=os.getenv('HF_TOKEN'))
|
5 |
+
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
6 |
+
print(endpoints)
|
7 |
+
result = client.predict(
|
8 |
+
ref_audio_orig=handle_file('https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav'),
|
9 |
+
ref_text="The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory.",
|
10 |
+
gen_text="Please surprise me and speak in whatever voice you enjoy.",
|
11 |
+
exp_name="E2-TTS",
|
12 |
+
remove_silence=False,
|
13 |
+
api_name="/infer",
|
14 |
+
)
|
test_tts_e2_f5_f5.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from gradio_client import Client, handle_file
|
3 |
+
|
4 |
+
client = Client("mrfakename/E2-F5-TTS", hf_token=os.getenv('HF_TOKEN'))
|
5 |
+
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
6 |
+
print(endpoints)
|
7 |
+
result = client.predict(
|
8 |
+
ref_audio_orig=handle_file('https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav'),
|
9 |
+
ref_text="The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory.",
|
10 |
+
gen_text="Please surprise me and speak in whatever voice you enjoy.",
|
11 |
+
exp_name="F5-TTS",
|
12 |
+
remove_silence=False,
|
13 |
+
api_name="/infer",
|
14 |
+
)
|
test_tts_edge.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from gradio_client import Client, file
|
3 |
+
|
4 |
+
client = Client("innoai/Edge-TTS-Text-to-Speech", hf_token=os.getenv('HF_TOKEN'))
|
5 |
+
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
6 |
+
# print(endpoints)
|
7 |
+
result = client.predict(
|
8 |
+
"Please surprise me and speak in whatever voice you enjoy.",
|
9 |
+
"en-US-EmmaMultilingualNeural - en-US (Female)",
|
10 |
+
0,
|
11 |
+
0,
|
12 |
+
api_name="/predict"
|
13 |
+
)
|
test_tts_fish.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from gradio_client import Client, handle_file
|
3 |
+
|
4 |
+
client = Client("fishaudio/fish-speech-1", hf_token=os.getenv('HF_TOKEN'))
|
5 |
+
# printz = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
6 |
+
# print(printz)
|
7 |
+
result = client.predict(
|
8 |
+
text="Please surprise me and speak in whatever voice you enjoy.",
|
9 |
+
enable_reference_audio=True,
|
10 |
+
reference_audio=handle_file('https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav'),
|
11 |
+
reference_text="In the first half of the 20th century, science fiction familiarized the world with the concept of artificially intelligent robots. It began with the โheartlessโ Tin man from the Wizard of Oz and continued with the humanoid robot that impersonated Maria in Metropolis. By the 1950s, we had a generation of scientists, mathematicians, and philosophers with the concept of artificial intelligence (or AI) culturally assimilated in their minds.",
|
12 |
+
max_new_tokens=1024,
|
13 |
+
chunk_length=200,
|
14 |
+
top_p=0.7,
|
15 |
+
repetition_penalty=1.2,
|
16 |
+
temperature=0.7,
|
17 |
+
batch_infer_num=1,
|
18 |
+
if_load_asr_model=False,
|
19 |
+
api_name="/inference_wrapper"
|
20 |
+
)
|
21 |
+
print(result[1])
|
test_tts_melo.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from gradio_client import Client
|
3 |
+
|
4 |
+
client = Client("mrfakename/MeloTTS", hf_token=os.getenv('HF_TOKEN'))
|
5 |
+
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
6 |
+
# print(endpoints)
|
7 |
+
result = client.predict(
|
8 |
+
"Please surprise me and speak in whatever voice you enjoy.", # str in 'Text to speak' Textbox component
|
9 |
+
"EN-US", # Literal['EN-US', 'EN-BR', 'EN_INDIA', 'EN-AU', 'EN-Default'] in 'Speaker' Dropdown component
|
10 |
+
1.0, # float (numeric value between 0.1 and 10.0)
|
11 |
+
"EN", # Literal['EN', 'ES', 'FR', 'ZH', 'JP', 'KR'] in 'Language' Radio component
|
12 |
+
api_name="/synthesize"
|
13 |
+
)
|
test_tts_metavoice.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from gradio_client import Client, file
|
3 |
+
|
4 |
+
client = Client("mrfakename/MetaVoice-1B-v0.1", hf_token=os.getenv('HF_TOKEN'))
|
5 |
+
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
6 |
+
print(endpoints)
|
7 |
+
result = client.predict(
|
8 |
+
"Please surprise me and speak in whatever voice you enjoy.", # str in 'What should I say!? (max 512 characters).' Textbox component
|
9 |
+
5, # float (numeric value between 0.0 and 10.0) in 'Speech Stability - improves text following for a challenging speaker' Slider component
|
10 |
+
5, # float (numeric value between 1.0 and 5.0) in 'Speaker similarity - How closely to match speaker identity and speech style.' Slider component
|
11 |
+
"Preset voices", # Literal['Preset voices', 'Upload target voice'] in 'Choose voice' Radio component
|
12 |
+
"Bria", # Literal['Bria', 'Alex', 'Jacob'] in 'Preset voices' Dropdown component
|
13 |
+
None, # filepath in 'Upload a clean sample to clone. Sample should contain 1 speaker, be between 30-90 seconds and not contain background noise.' Audio component
|
14 |
+
api_name="/tts"
|
15 |
+
)
|
test_tts_parler.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from gradio_client import Client
|
3 |
+
|
4 |
+
client = Client("parler-tts/parler_tts", hf_token=os.getenv('HF_TOKEN'))
|
5 |
+
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
6 |
+
print(endpoints)
|
7 |
+
result = client.predict(
|
8 |
+
text="Please surprise me and speak in whatever voice you enjoy.",
|
9 |
+
description="Elisabeth; Elisabeth\'s female voice; very clear audio",
|
10 |
+
# 3,
|
11 |
+
api_name="/gen_tts"
|
12 |
+
)
|
test_tts_parler_ex.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from gradio_client import Client
|
3 |
+
|
4 |
+
client = Client("parler-tts/parler-tts-expresso", hf_token=os.getenv('HF_TOKEN'))
|
5 |
+
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
6 |
+
print(endpoints)
|
7 |
+
result = client.predict(
|
8 |
+
text="Please surprise me and speak in whatever voice you enjoy.",
|
9 |
+
description="Elisabeth; Elisabeth\'s female voice; very clear audio",
|
10 |
+
# 3,
|
11 |
+
api_name="/gen_tts"
|
12 |
+
)
|
test_tts_tortoise.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from gradio_client import Client
|
3 |
+
|
4 |
+
client = Client("Manmay/tortoise-tts", hf_token=os.getenv('HF_TOKEN'))
|
5 |
+
result = client.predict(
|
6 |
+
text="Please surprise me and speak in whatever voice you enjoy.",
|
7 |
+
script=None,
|
8 |
+
voice="angie",
|
9 |
+
voice_b="disabled",
|
10 |
+
seed="No",
|
11 |
+
api_name="/predict"
|
12 |
+
)
|
13 |
+
print(result)
|
test_tts_voicecraft.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from gradio_client import Client
|
3 |
+
|
4 |
+
client = Client("pyp1/VoiceCraft_gradio", hf_token=os.getenv('HF_TOKEN'))
|
5 |
+
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
6 |
+
print(endpoints)
|
7 |
+
result = client.predict(
|
8 |
+
-1, #seed
|
9 |
+
0.08, #left_margin
|
10 |
+
0.08, #right_margin
|
11 |
+
16000, #codec_audio_sr
|
12 |
+
50, #codec_sr
|
13 |
+
0, #top_k
|
14 |
+
0.9, #top_p
|
15 |
+
1, #temperature
|
16 |
+
"3", #stop_repetition
|
17 |
+
4, #sample_batch_size
|
18 |
+
"1", #kvcache
|
19 |
+
"[1388,1898,131]", #silence_tokens
|
20 |
+
'https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav', #audio_path
|
21 |
+
"I cannot believe that the same model can also do text to speech synthesis too!", #transcript
|
22 |
+
True, #smart_transcript
|
23 |
+
3.016, #prompt_end_time
|
24 |
+
0.46, #edit_start_time
|
25 |
+
3.808, #edit_end_time
|
26 |
+
"Newline", #split_text
|
27 |
+
None, #selected_sentence
|
28 |
+
api_name="/run" #api_name
|
29 |
+
)
|
test_tts_whisper.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from gradio_client import Client, file
|
3 |
+
|
4 |
+
# client = Client("Pendrokar/WhisperSpeech", hf_token=os.getenv('HF_TOKEN'))
|
5 |
+
# client = Client("collabora/WhisperSpeech")
|
6 |
+
|
7 |
+
# client = Client(src="https://collabora-whisperspeech.hf.space", max_workers=1, hf_token=os.getenv('HF_TOKEN'))
|
8 |
+
client = Client(src="collabora/WhisperSpeech", max_workers=1, hf_token=os.getenv('HF_TOKEN'))
|
9 |
+
# endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
10 |
+
# print(endpoints)
|
11 |
+
|
12 |
+
def somefunc():
|
13 |
+
pass
|
14 |
+
result = client.predict(
|
15 |
+
# "/whisper_speech_demo",
|
16 |
+
# somefunc,
|
17 |
+
multilingual_text="Test.",
|
18 |
+
# speaker_audio=file('https://upload.wikimedia.org/wikipedia/commons/7/75/Winston_Churchill_-_Be_Ye_Men_of_Valour.ogg'),
|
19 |
+
speaker_audio=None,
|
20 |
+
# speaker_url=file('https://upload.wikimedia.org/wikipedia/commons/7/75/Winston_Churchill_-_Be_Ye_Men_of_Valour.ogg'),
|
21 |
+
# speaker_url="",
|
22 |
+
speaker_url=None,
|
23 |
+
cps=14,
|
24 |
+
api_name="/whisper_speech_demo",
|
25 |
+
# fn_index=0
|
26 |
+
)
|
27 |
+
# result = client.predict(
|
28 |
+
# ["Please surprise me and speak in whatever voice you enjoy.",
|
29 |
+
# None,
|
30 |
+
# 'https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav',
|
31 |
+
# 14],
|
32 |
+
# api_name="/whisper_speech_demo",
|
33 |
+
# fn_index=0
|
34 |
+
# )
|
test_tts_xtts.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from gradio_client import Client, file
|
3 |
+
|
4 |
+
client = Client("coqui/xtts", hf_token=os.getenv('HF_TOKEN'))
|
5 |
+
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
6 |
+
# print(endpoints)
|
7 |
+
result = client.predict(
|
8 |
+
"Quick test.", # str in 'What should I say!? (max 512 characters).' Textbox component
|
9 |
+
'en', #lang
|
10 |
+
'https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav', # voice sample
|
11 |
+
None, # mic voice sample
|
12 |
+
False, #use_mic
|
13 |
+
False, #cleanup_reference
|
14 |
+
False, #auto_detect
|
15 |
+
True, #ToS
|
16 |
+
fn_index=1
|
17 |
+
)
|
test_tts_xva.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from gradio_client import Client, file
|
3 |
+
|
4 |
+
client = Client("Pendrokar/xVASynth-TTS", hf_token=os.getenv('HF_TOKEN'))
|
5 |
+
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
6 |
+
# print(endpoints)
|
7 |
+
result = client.predict(
|
8 |
+
"Well, hello there!!", # str in 'Input Text' Textbox component
|
9 |
+
"x_ex04", # Literal['x_ex04', 'x_ex01', 'cnc_cabal', 'ccby_nvidia_hifi_92_F', 'ccby_nvidia_hifi_6671_M', 'more'] in 'Voice' Radio component
|
10 |
+
"en", # Literal['en', 'de', 'es', 'hi', 'zh', 'more'] in 'Language' Radio component
|
11 |
+
1.0, # float (numeric value between 0.5 and 2.0) in 'Duration' Slider component
|
12 |
+
|
13 |
+
0, # UNUSED; float (numeric value between 0 and 1.0) in 'Pitch' Slider component
|
14 |
+
0.1, # UNUSED; float (numeric value between 0.1 and 1.0) in 'Energy' Slider component
|
15 |
+
|
16 |
+
0, # Overriden by DeepMoji; float (numeric value between 0 and 1.0) in '๐ Anger' Slider component
|
17 |
+
0, # Overriden by DeepMoji; float (numeric value between 0 and 1.0) in '๐ Happiness' Slider component
|
18 |
+
0, # Overriden by DeepMoji; float (numeric value between 0 and 1.0) in '๐ญ Sadness' Slider component
|
19 |
+
0, # Overriden by DeepMoji; float (numeric value between 0 and 1.0) in '๐ฎ Surprise' Slider component
|
20 |
+
True, # bool in 'Use DeepMoji' Checkbox component
|
21 |
+
|
22 |
+
api_name="/predict"
|
23 |
+
)
|