import gradio as gr from gradio_client import Client, handle_file def get_speech(text, voice): client = Client("collabora/WhisperSpeech") result = client.predict( multilingual_text=text, speaker_audio=handle_file(voice), speaker_url="", cps=14, api_name="/whisper_speech_demo" ) print(result) return result def get_dreamtalk(image_in, speech): client = Client("fffiloni/dreamtalk") result = client.predict( audio_input=handle_file(speech), image_path=handle_file(image_in), emotional_style="M030_front_neutral_level1_001.mat", api_name="/infer" ) print(result) return result['video'] def pipe (text, voice, image_in): speech = get_speech(text, voice) try: video = get_dreamtalk(image_in, speech) except: raise gr.Error('An error occurred while loading DreamTalk: Image may not contain any face') return video with gr.Blocks() as demo: with gr.Column(): gr.HTML("""

Whisper Speech X Dreamtalk

""") with gr.Row(): with gr.Column(): image_in = gr.Image(label="Portrait IN", type="filepath", value="./einstein.jpg") with gr.Column(): voice = gr.Audio(type="filepath", label="Upload or Record Speaker audio (Optional voice cloning)") text = gr.Textbox(label="text") submit_btn = gr.Button('Submit') with gr.Column(): video_o = gr.Video(label="Video result") submit_btn.click( fn = pipe, inputs = [ text, voice, image_in ], outputs = [ video_o ], concurrency_limit = 3 ) demo.queue(max_size=10).launch(show_error=True, show_api=False)