|
import gradio as gr |
|
from gradio_client import Client, handle_file |
|
|
|
def get_speech(text, voice): |
|
|
|
client = Client("collabora/WhisperSpeech") |
|
result = client.predict( |
|
multilingual_text=text, |
|
speaker_audio=voice, |
|
speaker_url="", |
|
cps=14, |
|
api_name="/whisper_speech_demo" |
|
) |
|
print(result) |
|
|
|
return result |
|
|
|
def get_dreamtalk(image_in, speech): |
|
client = Client("fffiloni/dreamtalk") |
|
result = client.predict( |
|
audio_input=handle_file(speech), |
|
image_path=handle_file(image_in), |
|
emotional_style="M030_front_neutral_level1_001.mat", |
|
api_name="/infer" |
|
) |
|
print(result) |
|
return result['video'] |
|
|
|
def pipe (text, voice, image_in): |
|
|
|
speech = get_speech(text, voice) |
|
|
|
try: |
|
video = get_dreamtalk(image_in, speech) |
|
except: |
|
|
|
raise gr.Error('An error occurred while loading DreamTalk: Image may not contain any face') |
|
|
|
return video |
|
|
|
with gr.Blocks() as demo: |
|
with gr.Column(): |
|
gr.HTML(""" |
|
<h2 style="text-align: center;"> |
|
Whisper Speech X Dreamtalk |
|
</h2> |
|
<p style="text-align: center;"></p> |
|
""") |
|
with gr.Row(): |
|
with gr.Column(): |
|
image_in = gr.Image(label="Portrait IN", type="filepath", value="./einstein.jpg") |
|
with gr.Column(): |
|
voice = gr.Audio(type="filepath", label="Upload or Record Speaker audio (Optional voice cloning)") |
|
text = gr.Textbox(label="text") |
|
submit_btn = gr.Button('Submit') |
|
with gr.Column(): |
|
video_o = gr.Video(label="Video result") |
|
submit_btn.click( |
|
fn = pipe, |
|
inputs = [ |
|
text, voice, image_in |
|
], |
|
outputs = [ |
|
video_o |
|
], |
|
concurrency_limit = 3 |
|
) |
|
demo.queue(max_size=10).launch(show_error=True, show_api=False) |