File size: 1,932 Bytes
7572555
340d310
7572555
 
340d310
 
7572555
340d310
8d7bcfc
340d310
 
 
7572555
 
340d310
7572555
 
 
340d310
7572555
340d310
 
 
 
7572555
 
 
 
 
 
 
1242299
 
 
6c305c0
 
 
7572555
 
 
 
 
 
60f71bf
 
 
 
7572555
 
 
7d1064d
1e7779c
84ddd8e
1e7779c
7572555
 
6b8c3fc
7572555
 
 
 
 
 
 
2c65526
 
7572555
2c65526
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import gradio as gr
from gradio_client import Client, handle_file

def get_speech(text, voice):
    
    client = Client("collabora/WhisperSpeech")
    result = client.predict(
        multilingual_text=text,
        speaker_audio=handle_file(voice),
        speaker_url="",
        cps=14,
        api_name="/whisper_speech_demo"
    )
    print(result)
    
    return result

def get_dreamtalk(image_in, speech):
    client = Client("fffiloni/dreamtalk")
    result = client.predict(
        audio_input=handle_file(speech),
        image_path=handle_file(image_in),
        emotional_style="M030_front_neutral_level1_001.mat",
        api_name="/infer"
    )
    print(result)
    return result['video']

def pipe (text, voice, image_in):

    speech = get_speech(text, voice)
    
    try:
        video = get_dreamtalk(image_in, speech)
    except:
       
        raise gr.Error('An error occurred while loading DreamTalk: Image may not contain any face')

    return video

with gr.Blocks() as demo:
    with gr.Column():
        gr.HTML("""
        <h2 style="text-align: center;">
        Whisper Speech X Dreamtalk
        </h2>
        <p style="text-align: center;"></p>
        """)
        with gr.Row():
            with gr.Column():
                image_in = gr.Image(label="Portrait IN", type="filepath", value="./einstein.jpg")
            with gr.Column():
                voice = gr.Audio(type="filepath", label="Upload or Record Speaker audio (Optional voice cloning)")
                text = gr.Textbox(label="text")
                submit_btn = gr.Button('Submit')
            with gr.Column():
                video_o = gr.Video(label="Video result")
    submit_btn.click(
        fn = pipe,
        inputs = [
            text, voice, image_in
        ],
        outputs = [
            video_o
        ],
        concurrency_limit = 3
    )
demo.queue(max_size=10).launch(show_error=True, show_api=False)