Spaces:

Nuno-Tome
/

nnf_face_n_text_to_video

Build error

App Files Files Community

nnf_face_n_text_to_video / app_new.py

Nuno-Tome

no message

7f9420f 12 months ago

raw

history blame contribute delete

2.87 kB

	import gradio as gr
	from gradio_client import Client


	DEBUG_MODE = True
	SAS_SWITCH = True

	'''
	Function to get the speech from the text
	@params: text: str: The text to be converted to speech
	@params: voice: str: The voice to be used for the speech
	@return: result: str: The speech from the text
	'''
	def get_speech(text, voice):

	'''
	For now we are using external space to get the result.
	In future we will use our own model to get be more independent
	'''
	client = Client("https://collabora-whisperspeech.hf.space/")
	result = client.predict(
	# str in 'Enter multilingual text📝' Textbox component
	text,
	# filepath in 'Upload or Record Speaker Audio (optional)🌬️💬' Audio component
	voice,
	"", # str in 'alternatively, you can paste in an audio file URL:' Textbox component
	14, # float (numeric value between 10 and 15) in 'Tempo (in characters per second)' Slider component
	api_name="/whisper_speech_demo"
	)
	if DEBUG_MODE:
	print(result)
	return result

	'''

	'''
	def generate_audio(pipe, segments, speaker, speaker_url, cps=14):

	# - If the speaker is a string and is a file path
	# then we will extract the speaker embedding
	# from the file
	# - else if the speaker_url is provided then we
	# will extract the speaker embedding from the url
	# - else we will use the default speaker
	if isinstance(speaker, (str, Path)):
	speaker = pipe.extract_spk_emb(speaker)
	elif speaker_url:
	speaker = pipe.extract_spk_emb(speaker_url)
	else: speaker = pipe.default_speaker


	langs, texts = [list(x) for x in zip(*segments)]
	print(texts, langs)

	stoks = pipe.t2s.generate(texts, cps=cps, lang=langs)
	stoks = stoks[stoks!=512]
	atoks = pipe.s2a.generate(stoks, speaker.unsqueeze(0))
	audio = pipe.vocoder.decode(atoks)

	return audio.cpu()







	with gr.Blocks() as demo:
	with gr.Row():
	text_input = gr.Textbox(label="Enter multilingual text📝")
	cps = gr.Slider(value=14, minimum=10, maximum=15, step=.25,
	label="Speed (in characters per second)")

	with gr.Row(equal_height=True):
	speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)🌬️💬",
	sources=["upload", "microphone"],
	type='filepath')
	url_input = gr.Textbox(label="alternatively, you can paste in an audio file URL:")
	gr.Markdown(" \n ") # fixes the bottom overflow from Audio
	generate_button = gr.Button("Try Collabora's WhisperSpeech🌟")
	with gr.Column(scale=1):
	output_audio = gr.Audio(label="WhisperSpeech says…")



	demo.launch(server_port=46007)