TTS-Spaces-Arena

Running on Zero

App Files Files Community

Pendrokar commited on Oct 12, 2024

Commit

dee6d0d

1 Parent(s): 7eb29b6

the test TTS python scripts

Browse files

Files changed (13) hide show

test_tts_e2_f5_e2.py +14 -0
test_tts_e2_f5_f5.py +14 -0
test_tts_edge.py +13 -0
test_tts_fish.py +21 -0
test_tts_melo.py +13 -0
test_tts_metavoice.py +15 -0
test_tts_parler.py +12 -0
test_tts_parler_ex.py +12 -0
test_tts_tortoise.py +13 -0
test_tts_voicecraft.py +29 -0
test_tts_whisper.py +34 -0
test_tts_xtts.py +17 -0
test_tts_xva.py +23 -0

test_tts_e2_f5_e2.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import os
+from gradio_client import Client, handle_file
+client = Client("mrfakename/E2-F5-TTS", hf_token=os.getenv('HF_TOKEN'))
+endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
+print(endpoints)
+result = client.predict(
+		ref_audio_orig=handle_file('https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav'),
+		ref_text="The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory.",
+		gen_text="Please surprise me and speak in whatever voice you enjoy.",
+		exp_name="E2-TTS",
+		remove_silence=False,
+		api_name="/infer",
+)

test_tts_e2_f5_f5.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import os
+from gradio_client import Client, handle_file
+client = Client("mrfakename/E2-F5-TTS", hf_token=os.getenv('HF_TOKEN'))
+endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
+print(endpoints)
+result = client.predict(
+		ref_audio_orig=handle_file('https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav'),
+		ref_text="The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory.",
+		gen_text="Please surprise me and speak in whatever voice you enjoy.",
+		exp_name="F5-TTS",
+		remove_silence=False,
+		api_name="/infer",
+)

test_tts_edge.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import os
+from gradio_client import Client, file
+client = Client("innoai/Edge-TTS-Text-to-Speech", hf_token=os.getenv('HF_TOKEN'))
+endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
+# print(endpoints)
+result = client.predict(
+		"Please surprise me and speak in whatever voice you enjoy.",
+		"en-US-EmmaMultilingualNeural - en-US (Female)",
+		0,
+		0,
+		api_name="/predict"
+)

test_tts_fish.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import os
+from gradio_client import Client, handle_file
+client = Client("fishaudio/fish-speech-1", hf_token=os.getenv('HF_TOKEN'))
+# printz = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
+# print(printz)
+result = client.predict(
+		text="Please surprise me and speak in whatever voice you enjoy.",
+		enable_reference_audio=True,
+		reference_audio=handle_file('https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav'),
+		reference_text="In the first half of the 20th century, science fiction familiarized the world with the concept of artificially intelligent robots. It began with the “heartless” Tin man from the Wizard of Oz and continued with the humanoid robot that impersonated Maria in Metropolis. By the 1950s, we had a generation of scientists, mathematicians, and philosophers with the concept of artificial intelligence (or AI) culturally assimilated in their minds.",
+		max_new_tokens=1024,
+		chunk_length=200,
+		top_p=0.7,
+		repetition_penalty=1.2,
+		temperature=0.7,
+		batch_infer_num=1,
+		if_load_asr_model=False,
+		api_name="/inference_wrapper"
+)
+print(result[1])

test_tts_melo.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import os
+from gradio_client import Client
+client = Client("mrfakename/MeloTTS", hf_token=os.getenv('HF_TOKEN'))
+endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
+# print(endpoints)
+result = client.predict(
+		"Please surprise me and speak in whatever voice you enjoy.",	# str in 'Text to speak' Textbox component
+		"EN-US",	# Literal['EN-US', 'EN-BR', 'EN_INDIA', 'EN-AU', 'EN-Default'] in 'Speaker' Dropdown component
+		1.0,	# float (numeric value between 0.1 and 10.0)
+		"EN",	# Literal['EN', 'ES', 'FR', 'ZH', 'JP', 'KR'] in 'Language' Radio component
+		api_name="/synthesize"
+)

test_tts_metavoice.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import os
+from gradio_client import Client, file
+client = Client("mrfakename/MetaVoice-1B-v0.1", hf_token=os.getenv('HF_TOKEN'))
+endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
+print(endpoints)
+result = client.predict(
+        "Please surprise me and speak in whatever voice you enjoy.",	# str  in 'What should I say!? (max 512 characters).' Textbox component
+		5,	# float (numeric value between 0.0 and 10.0) in 'Speech Stability - improves text following for a challenging speaker' Slider component
+		5,	# float (numeric value between 1.0 and 5.0) in 'Speaker similarity - How closely to match speaker identity and speech style.' Slider component
+		"Preset voices",	# Literal['Preset voices', 'Upload target voice']  in 'Choose voice' Radio component
+		"Bria",	# Literal['Bria', 'Alex', 'Jacob']  in 'Preset voices' Dropdown component
+		None,	# filepath  in 'Upload a clean sample to clone. Sample should contain 1 speaker, be between 30-90 seconds and not contain background noise.' Audio component
+		api_name="/tts"
+)

test_tts_parler.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import os
+from gradio_client import Client
+client = Client("parler-tts/parler_tts", hf_token=os.getenv('HF_TOKEN'))
+endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
+print(endpoints)
+result = client.predict(
+		text="Please surprise me and speak in whatever voice you enjoy.",
+		description="Elisabeth; Elisabeth\'s female voice; very clear audio",
+		# 3,
+		api_name="/gen_tts"
+)

test_tts_parler_ex.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import os
+from gradio_client import Client
+client = Client("parler-tts/parler-tts-expresso", hf_token=os.getenv('HF_TOKEN'))
+endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
+print(endpoints)
+result = client.predict(
+		text="Please surprise me and speak in whatever voice you enjoy.",
+		description="Elisabeth; Elisabeth\'s female voice; very clear audio",
+		# 3,
+		api_name="/gen_tts"
+)

test_tts_tortoise.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import os
+from gradio_client import Client
+client = Client("Manmay/tortoise-tts", hf_token=os.getenv('HF_TOKEN'))
+result = client.predict(
+		text="Please surprise me and speak in whatever voice you enjoy.",
+		script=None,
+		voice="angie",
+		voice_b="disabled",
+		seed="No",
+		api_name="/predict"
+)
+print(result)

test_tts_voicecraft.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import os
+from gradio_client import Client
+client = Client("pyp1/VoiceCraft_gradio", hf_token=os.getenv('HF_TOKEN'))
+endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
+print(endpoints)
+result = client.predict(
+		-1, #seed
+		0.08, #left_margin
+		0.08, #right_margin
+		16000, #codec_audio_sr
+		50, #codec_sr
+		0, #top_k
+		0.9, #top_p
+		1, #temperature
+		"3", #stop_repetition
+		4, #sample_batch_size
+		"1", #kvcache
+		"[1388,1898,131]", #silence_tokens
+		'https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav', #audio_path
+		"I cannot believe that the same model can also do text to speech synthesis too!", #transcript
+		True, #smart_transcript
+		3.016, #prompt_end_time
+		0.46, #edit_start_time
+		3.808, #edit_end_time
+		"Newline", #split_text
+		None, #selected_sentence
+		api_name="/run" #api_name
+)

test_tts_whisper.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import os
+from gradio_client import Client, file
+# client = Client("Pendrokar/WhisperSpeech", hf_token=os.getenv('HF_TOKEN'))
+# client = Client("collabora/WhisperSpeech")
+# client = Client(src="https://collabora-whisperspeech.hf.space", max_workers=1, hf_token=os.getenv('HF_TOKEN'))
+client = Client(src="collabora/WhisperSpeech", max_workers=1, hf_token=os.getenv('HF_TOKEN'))
+# endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
+# print(endpoints)
+def somefunc():
+    pass
+result = client.predict(
+		# "/whisper_speech_demo",
+    	# somefunc,
+		multilingual_text="Test.",
+		# speaker_audio=file('https://upload.wikimedia.org/wikipedia/commons/7/75/Winston_Churchill_-_Be_Ye_Men_of_Valour.ogg'),
+		speaker_audio=None,
+		# speaker_url=file('https://upload.wikimedia.org/wikipedia/commons/7/75/Winston_Churchill_-_Be_Ye_Men_of_Valour.ogg'),
+		# speaker_url="",
+		speaker_url=None,
+		cps=14,
+		api_name="/whisper_speech_demo",
+		# fn_index=0
+)
+# result = client.predict(
+# 		["Please surprise me and speak in whatever voice you enjoy.",
+# 		None,
+# 		'https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav',
+# 		14],
+# 		api_name="/whisper_speech_demo",
+# 		fn_index=0
+# )

test_tts_xtts.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import os
+from gradio_client import Client, file
+client = Client("coqui/xtts", hf_token=os.getenv('HF_TOKEN'))
+endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
+# print(endpoints)
+result = client.predict(
+        "Quick test.",	# str  in 'What should I say!? (max 512 characters).' Textbox component
+        'en', #lang
+        'https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav', # voice sample
+        None, # mic voice sample
+        False, #use_mic
+        False, #cleanup_reference
+        False, #auto_detect
+        True, #ToS
+		fn_index=1
+)

test_tts_xva.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import os
+from gradio_client import Client, file
+client = Client("Pendrokar/xVASynth-TTS", hf_token=os.getenv('HF_TOKEN'))
+endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
+# print(endpoints)
+result = client.predict(
+		"Well, hello there!!",	# str  in 'Input Text' Textbox component
+		"x_ex04",	# Literal['x_ex04', 'x_ex01', 'cnc_cabal', 'ccby_nvidia_hifi_92_F', 'ccby_nvidia_hifi_6671_M', 'more']  in 'Voice' Radio component
+		"en",	# Literal['en', 'de', 'es', 'hi', 'zh', 'more']  in 'Language' Radio component
+		1.0,	# float (numeric value between 0.5 and 2.0) in 'Duration' Slider component
+		0,	# UNUSED; float (numeric value between 0 and 1.0) in 'Pitch' Slider component
+		0.1,	# UNUSED; float (numeric value between 0.1 and 1.0) in 'Energy' Slider component
+		0,	# Overriden by DeepMoji; float (numeric value between 0 and 1.0) in '😠 Anger' Slider component
+		0,	# Overriden by DeepMoji; float (numeric value between 0 and 1.0) in '😃 Happiness' Slider component
+		0,	# Overriden by DeepMoji; float (numeric value between 0 and 1.0) in '😭 Sadness' Slider component
+		0,	# Overriden by DeepMoji; float (numeric value between 0 and 1.0) in '😮 Surprise' Slider component
+		True,	# bool  in 'Use DeepMoji' Checkbox component
+		api_name="/predict"
+)