Spaces:
Build error
Build error
import gradio as gr | |
from gradio_client import Client | |
DEBUG_MODE = True | |
SAS_SWITCH = True | |
''' | |
Function to get the speech from the text | |
@params: text: str: The text to be converted to speech | |
@params: voice: str: The voice to be used for the speech | |
@return: result: str: The speech from the text | |
''' | |
def get_speech(text, voice): | |
''' | |
For now we are using external space to get the result. | |
In future we will use our own model to get be more independent | |
''' | |
client = Client("https://collabora-whisperspeech.hf.space/") | |
result = client.predict( | |
# str in 'Enter multilingual text📝' Textbox component | |
text, | |
# filepath in 'Upload or Record Speaker Audio (optional)🌬️💬' Audio component | |
voice, | |
"", # str in 'alternatively, you can paste in an audio file URL:' Textbox component | |
14, # float (numeric value between 10 and 15) in 'Tempo (in characters per second)' Slider component | |
api_name="/whisper_speech_demo" | |
) | |
if DEBUG_MODE: | |
print(result) | |
return result | |
''' | |
''' | |
def generate_audio(pipe, segments, speaker, speaker_url, cps=14): | |
# - If the speaker is a string and is a file path | |
# then we will extract the speaker embedding | |
# from the file | |
# - else if the speaker_url is provided then we | |
# will extract the speaker embedding from the url | |
# - else we will use the default speaker | |
if isinstance(speaker, (str, Path)): | |
speaker = pipe.extract_spk_emb(speaker) | |
elif speaker_url: | |
speaker = pipe.extract_spk_emb(speaker_url) | |
else: speaker = pipe.default_speaker | |
langs, texts = [list(x) for x in zip(*segments)] | |
print(texts, langs) | |
stoks = pipe.t2s.generate(texts, cps=cps, lang=langs) | |
stoks = stoks[stoks!=512] | |
atoks = pipe.s2a.generate(stoks, speaker.unsqueeze(0)) | |
audio = pipe.vocoder.decode(atoks) | |
return audio.cpu() | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
text_input = gr.Textbox(label="Enter multilingual text📝") | |
cps = gr.Slider(value=14, minimum=10, maximum=15, step=.25, | |
label="Speed (in characters per second)") | |
with gr.Row(equal_height=True): | |
speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)🌬️💬", | |
sources=["upload", "microphone"], | |
type='filepath') | |
url_input = gr.Textbox(label="alternatively, you can paste in an audio file URL:") | |
gr.Markdown(" \n ") # fixes the bottom overflow from Audio | |
generate_button = gr.Button("Try Collabora's WhisperSpeech🌟") | |
with gr.Column(scale=1): | |
output_audio = gr.Audio(label="WhisperSpeech says…") | |
demo.launch(server_port=46007) |