File size: 9,326 Bytes
2945355
 
 
075752f
30b0683
0aa3e03
2231aa9
0027dc5
2945355
5578821
 
2945355
 
 
30b0683
 
 
 
 
2945355
5578821
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ac1a98
5578821
 
 
1ac1a98
 
 
 
 
 
 
 
8af9162
 
 
 
4264aae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ac1a98
4264aae
2945355
eab49d0
0027dc5
 
 
01d5c02
0027dc5
8af9162
 
 
0027dc5
2945355
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0aa3e03
 
8af9162
0aa3e03
 
98bce3f
 
0aa3e03
 
2945355
0aa3e03
2945355
98bce3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eab49d0
2945355
4f13edc
 
 
 
 
2231aa9
1ac1a98
4f13edc
2231aa9
8af9162
4f13edc
5578821
4f13edc
 
 
 
 
 
 
 
 
 
8af9162
 
 
4f13edc
 
8af9162
 
5578821
 
eab49d0
 
4f13edc
 
 
8af9162
 
 
 
6662b42
 
 
 
4f13edc
 
6662b42
4f13edc
6662b42
4f13edc
4fba074
5578821
 
 
 
 
 
 
 
 
 
4f13edc
8af9162
1ac1a98
 
8af9162
1ac1a98
8af9162
 
 
 
 
 
 
 
2945355
 
11a4007
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
import gradio as gr
import subprocess
import os
import cv2
from huggingface_hub import hf_hub_download
import glob
from moviepy.editor import VideoFileClip
from datetime import datetime

is_shared_ui = True if "fffiloni/X-Portrait" in os.environ['SPACE_ID'] else False

# Ensure 'checkpoint' directory exists
os.makedirs("checkpoint", exist_ok=True)

hf_hub_download(
    repo_id="fffiloni/X-Portrait",
    filename="model_state-415001.th",
    local_dir="checkpoint"
)

def trim_video(video_path, output_dir="trimmed_videos", max_duration=2):
    # Create output directory if it does not exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Generate a timestamp for the output filename
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_path = os.path.join(output_dir, f"trimmed_video_{timestamp}.mp4")
    
    # Load the video
    with VideoFileClip(video_path) as video:
        # Check the duration of the video
        if video.duration > max_duration:
            # Trim the video to the first max_duration seconds
            trimmed_video = video.subclip(0, max_duration)
            # Write the trimmed video to a file
            trimmed_video.write_videofile(output_path, codec="libx264")
            return output_path
        else:
            # If the video is within the duration, return the original path
            return video_path

def load_driving_video(video_path):
    if is_shared_ui :
        video_path = trim_video(video_path)
        print("Path to the (trimmed) driving video:", video_path)
        frames_data = extract_frames_with_labels(video_path)
        return video_path, frames_data, gr.update(open="True")
    else:
        frames_data = extract_frames_with_labels(video_path)
        return video_path, frames_data, gr.update(open="True")
        
def extract_frames_with_labels(video_path, base_output_dir="frames"):

    # Generate a timestamped folder name
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_dir = os.path.join(base_output_dir, f"frames_{timestamp}")
    
    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)
    
    # Open the video file
    video_capture = cv2.VideoCapture(video_path)
    if not video_capture.isOpened():
        raise ValueError(f"Cannot open video file: {video_path}")
    
    frame_data = []
    frame_index = 0
    
    # Loop through the video frames
    while True:
        ret, frame = video_capture.read()
        if not ret:
            break  # Exit the loop if there are no frames left to read

        # Zero-padded frame index for filename and label
        frame_label = f"{frame_index:04}"
        frame_filename = os.path.join(output_dir, f"frame_{frame_label}.jpg")
        
        # Save the frame as a .jpg file
        cv2.imwrite(frame_filename, frame)
        
        # Append the tuple (filename, label) to the list
        frame_data.append((frame_filename, frame_label))
        
        # Increment frame index
        frame_index += 1
    
    # Release the video capture object
    video_capture.release()
    
    return  frame_data

# Define a function to run your script with selected inputs
def run_xportrait(source_image, driving_video, seed, uc_scale, best_frame, out_frames, num_mix, ddim_steps, progress=gr.Progress(track_tqdm=True)):

    # Create a unique output directory name based on current date and time
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_dir = f"output_{timestamp}"
    os.makedirs(output_dir, exist_ok=True)

    model_config = "config/cldm_v15_appearance_pose_local_mm.yaml"
    resume_dir = "checkpoint/model_state-415001.th"
    
    # Construct the command
    command = [
        "python3", "core/test_xportrait.py",
        "--model_config", model_config,
        "--output_dir", output_dir,
        "--resume_dir", resume_dir,
        "--seed", str(seed),
        "--uc_scale", str(uc_scale),
        "--source_image", source_image,
        "--driving_video", driving_video,
        "--best_frame", str(best_frame),
        "--out_frames", str(out_frames),
        "--num_mix", str(num_mix),
        "--ddim_steps", str(ddim_steps)
    ]
    
    # Run the command
    try:
        subprocess.run(command, check=True)
        
        # Find the generated video file in the output directory
        video_files = glob.glob(os.path.join(output_dir, "*.mp4"))
        print(video_files)
        if video_files:
            final_vid = convert_video_to_h264_aac(video_files[0])
            return f"Output video saved at: {final_vid}", final_vid
        else:
            return "No video file was found in the output directory.", None
    except subprocess.CalledProcessError as e:
        return f"An error occurred: {e}", None

def convert_video_to_h264_aac(video_path):
    # Get the directory and original filename
    original_dir = os.path.dirname(video_path)
    original_name, _ = os.path.splitext(os.path.basename(video_path))
    
    # Define the output path in the same directory
    output_path = os.path.join(original_dir, f"{original_name}_converted.mp4")
    
    # Load the video
    with VideoFileClip(video_path) as video:
        # Write the video with H.264 and AAC codecs
        video.write_videofile(
            output_path,
            codec="libx264",      # H.264 video codec
            audio_codec="aac",     # AAC audio codec
            temp_audiofile="temp-audio.m4a",  # Temporary audio file (moviepy requirement)
            remove_temp=True       # Remove temporary files after writing
        )
        
    return output_path

# Set up Gradio interface
css="""
div#frames-gallery{
    overflow: scroll!important;
}
"""

example_frame_data = extract_frames_with_labels("./assets/driving_video.mp4")
with gr.Blocks(css=css) as demo:
    
    with gr.Column(elem_id="col-container"):
        gr.Markdown("# X-Portrait: Expressive Portrait Animation with Hierarchical Motion Attention")
        gr.Markdown("On this shared UI, drinving video input will be trimmed to 2 seconds max. Duplicate this space for more controls.")
        gr.HTML("""
        <div style="display:flex;column-gap:4px;">
            <a href='https://github.com/bytedance/X-Portrait'>
                <img src='https://img.shields.io/badge/GitHub-Repo-blue'>
            </a> 
            <a href='https://byteaigc.github.io/x-portrait/'>
                <img src='https://img.shields.io/badge/Project-Page-green'>
            </a>
        </div>
        """)
        with gr.Row():
            with gr.Column():
                with gr.Row():
                    source_image = gr.Image(label="Source Image", type="filepath")
                    driving_video = gr.Video(label="Driving Video")
                with gr.Group():
                    with gr.Row():
                        best_frame = gr.Number(value=36, label="Best Frame", info="specify the frame index in the driving video where the head pose best matches the source image (note: precision of best_frame index might affect the final quality)")
                        out_frames = gr.Number(value=-1, label="Out Frames", info="number of generation frames")
                    with gr.Accordion("Driving video Frames", open=False) as frames_gallery_panel:
                        driving_frames = gr.Gallery(show_label=True, columns=6, height=380, elem_id="frames-gallery")
                with gr.Row():
                    seed = gr.Number(value=999, label="Seed")
                    uc_scale = gr.Number(value=5, label="UC Scale")
                with gr.Row():
                    num_mix = gr.Number(value=4, label="Number of Mix")
                    ddim_steps = gr.Number(value=30, label="DDIM Steps")
                submit_btn = gr.Button("Submit")
                
            with gr.Column():
                video_output = gr.Video(label="Output Video")
                status = gr.Textbox(label="status")
                gr.Examples(
                    examples=[
                        ["./assets/source_image.png", "./assets/driving_video.mp4", "./assets/inference_result.mp4"]
                    ],
                    inputs=[source_image, driving_video, video_output]
                )

                gr.HTML("""
                <div style="display:flex;column-gap:4px;">
                    <a href="https://huggingface.co/spaces/fffiloni/X-Portrait?duplicate=true">
                        <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-xl.svg" alt="Duplicate this Space">
                    </a>
                    <a href="https://huggingface.co/fffiloni">
                        <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/follow-me-on-HF-xl-dark.svg" alt="Follow me on HF">
                    </a>
                </div>
                """)


    driving_video.upload(
        fn = load_driving_video,
        inputs = [driving_video],
        outputs = [driving_video, driving_frames, frames_gallery_panel],
        queue = False
    )
    
    submit_btn.click(
        fn = run_xportrait,
        inputs = [source_image, driving_video, seed, uc_scale, best_frame, out_frames, num_mix, ddim_steps],
        outputs = [status, video_output]
    )

# Launch the Gradio app
demo.launch(ssr_mode = False)