Nuno-Tome's picture
no message
7f9420f
import gradio as gr
from gradio_client import Client
DEBUG_MODE = True
SAS_SWITCH = True
'''
Function to get the speech from the text
@params: text: str: The text to be converted to speech
@params: voice: str: The voice to be used for the speech
@return: result: str: The speech from the text
'''
def get_speech(text, voice):
'''
For now we are using external space to get the result.
In future we will use our own model to get be more independent
'''
client = Client("https://collabora-whisperspeech.hf.space/")
result = client.predict(
# str in 'Enter multilingual text📝' Textbox component
text,
# filepath in 'Upload or Record Speaker Audio (optional)🌬️💬' Audio component
voice,
"", # str in 'alternatively, you can paste in an audio file URL:' Textbox component
14, # float (numeric value between 10 and 15) in 'Tempo (in characters per second)' Slider component
api_name="/whisper_speech_demo"
)
if DEBUG_MODE:
print(result)
return result
'''
'''
def generate_audio(pipe, segments, speaker, speaker_url, cps=14):
# - If the speaker is a string and is a file path
# then we will extract the speaker embedding
# from the file
# - else if the speaker_url is provided then we
# will extract the speaker embedding from the url
# - else we will use the default speaker
if isinstance(speaker, (str, Path)):
speaker = pipe.extract_spk_emb(speaker)
elif speaker_url:
speaker = pipe.extract_spk_emb(speaker_url)
else: speaker = pipe.default_speaker
langs, texts = [list(x) for x in zip(*segments)]
print(texts, langs)
stoks = pipe.t2s.generate(texts, cps=cps, lang=langs)
stoks = stoks[stoks!=512]
atoks = pipe.s2a.generate(stoks, speaker.unsqueeze(0))
audio = pipe.vocoder.decode(atoks)
return audio.cpu()
with gr.Blocks() as demo:
with gr.Row():
text_input = gr.Textbox(label="Enter multilingual text📝")
cps = gr.Slider(value=14, minimum=10, maximum=15, step=.25,
label="Speed (in characters per second)")
with gr.Row(equal_height=True):
speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)🌬️💬",
sources=["upload", "microphone"],
type='filepath')
url_input = gr.Textbox(label="alternatively, you can paste in an audio file URL:")
gr.Markdown(" \n ") # fixes the bottom overflow from Audio
generate_button = gr.Button("Try Collabora's WhisperSpeech🌟")
with gr.Column(scale=1):
output_audio = gr.Audio(label="WhisperSpeech says…")
demo.launch(server_port=46007)