Spaces:

Pranjal12345
/

Text_to_Speech

Running

App Files Files Community

Text_to_Speech / app.py

Pranjal12345

Update app.py

9decc3e about 1 year ago

raw

history blame contribute delete

2.15 kB

	import torch
	import gradio as gr
	from tortoise.api import TextToSpeech
	from tortoise.utils.audio import load_voice, load_voices

	tts = TextToSpeech(kv_cache=True, use_deepspeed=True, half=True)

	languages = ['Male', 'Female']
	voices = {'Male': ['deniro', 'freeman'], 'Female': ['emma', 'angie']}

	def inference(text, Gender, voice, Emotion, Preset):
	texts = [text]

	Angry_tone = "[I am so angry]"
	Sad_tone = "[I am so sad]"
	Happy_tone = "[I am so happy]"
	Scared_tone = "[I am so scared]"

	if Emotion == "Angry":
	text = Angry_tone + text
	if Emotion == "Sad":
	text = Sad_tone + text
	if Emotion == "Happy":
	text = Happy_tone + text
	if Emotion == "Scared":
	text = Scared_tone + text

	voice_samples, conditioning_latents = load_voice(voice)

	audio_frames = []

	for j, text in enumerate(texts):
	for audio_frame in tts.tts_with_preset(
	text,
	voice_samples=voice_samples,
	conditioning_latents=conditioning_latents,
	preset=Preset,
	k=1
	):
	audio_frames.append(torch.from_numpy(audio_frame.cpu().detach().numpy()))

	complete_audio = torch.cat(audio_frames, dim=0)
	yield (24000, complete_audio.numpy())

	def rs_change(rs):
	new_choices = voices[rs]
	return gr.update(choices=new_choices, value=new_choices[0] if new_choices else None)

	title = "Tortoise TTS"

	with gr.Blocks() as app:
	text = gr.Textbox(lines=4, label="Text:")
	rs = gr.Dropdown(choices=languages, value='Male', label="Gender")
	rs_hw = gr.Dropdown(choices=voices['Male'], interactive=True, label="Voice")
	rs.change(fn=rs_change, inputs=[rs], outputs=[rs_hw])
	Emotion = gr.Radio(["Angry", "Sad", "Happy", "Scared"], type="value", label="Emotion")
	Preset = gr.Radio(["ultra_fast", "fast", "standard", "high_quality"], type="value", value="ultra_fast", label="Preset")
	output_audio = gr.Audio(label="Streaming audio:", streaming=True, autoplay=True)
	btn = gr.Button("Generate")
	btn.click(inference, inputs=[text, rs, rs_hw, Emotion, Preset], outputs=[output_audio])

	app.launch()