Spaces:
Running
on
Zero
Running
on
Zero
update
Browse files
app.py
CHANGED
@@ -1,12 +1,95 @@
|
|
|
|
|
|
|
|
1 |
import spaces
|
2 |
import gradio as gr
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
examples = [
|
6 |
["examples/example_01.mp4", 25, 1.2, 1024, 195],
|
7 |
]
|
8 |
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
def construct_demo():
|
11 |
with gr.Blocks(analytics_enabled=False) as depthcrafter_iface:
|
12 |
gr.Markdown(
|
@@ -29,11 +112,6 @@ def construct_demo():
|
|
29 |
<a style='font-size:18px;color: #000000' href='https://depthcrafter.github.io/'> [Project Page] </a> </div>
|
30 |
"""
|
31 |
)
|
32 |
-
# demo
|
33 |
-
depthcrafter_demo = DepthCrafterDemo(
|
34 |
-
unet_path="tencent/DepthCrafter",
|
35 |
-
pre_train_path="stabilityai/stable-video-diffusion-img2vid-xt",
|
36 |
-
)
|
37 |
|
38 |
with gr.Row(equal_height=True):
|
39 |
with gr.Column(scale=1):
|
@@ -105,12 +183,12 @@ def construct_demo():
|
|
105 |
process_length,
|
106 |
],
|
107 |
outputs=[output_video_1, output_video_2],
|
108 |
-
fn=
|
109 |
cache_examples=False,
|
110 |
)
|
111 |
|
112 |
generate_btn.click(
|
113 |
-
fn=
|
114 |
inputs=[
|
115 |
input_video,
|
116 |
num_denoising_steps,
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
import spaces
|
5 |
import gradio as gr
|
6 |
+
import torch
|
7 |
+
from diffusers.training_utils import set_seed
|
8 |
+
|
9 |
+
from depthcrafter.depth_crafter_ppl import DepthCrafterPipeline
|
10 |
+
from depthcrafter.unet import DiffusersUNetSpatioTemporalConditionModelDepthCrafter
|
11 |
+
|
12 |
+
import uuid
|
13 |
+
import random
|
14 |
+
from huggingface_hub import hf_hub_download
|
15 |
+
|
16 |
+
from depthcrafter.utils import read_video_frames, vis_sequence_depth, save_video
|
17 |
|
18 |
examples = [
|
19 |
["examples/example_01.mp4", 25, 1.2, 1024, 195],
|
20 |
]
|
21 |
|
22 |
|
23 |
+
unet = DiffusersUNetSpatioTemporalConditionModelDepthCrafter.from_pretrained(
|
24 |
+
"tencent/DepthCrafter",
|
25 |
+
subfolder="unet",
|
26 |
+
low_cpu_mem_usage=True,
|
27 |
+
torch_dtype=torch.float16,
|
28 |
+
)
|
29 |
+
pipe = DepthCrafterPipeline.from_pretrained(
|
30 |
+
"stabilityai/stable-video-diffusion-img2vid-xt",
|
31 |
+
unet=unet,
|
32 |
+
torch_dtype=torch.float16,
|
33 |
+
variant="fp16",
|
34 |
+
)
|
35 |
+
pipe.to("cuda")
|
36 |
+
|
37 |
+
|
38 |
+
@spaces.GPU(duration=120)
|
39 |
+
def infer_depth(
|
40 |
+
video: str,
|
41 |
+
num_denoising_steps: int,
|
42 |
+
guidance_scale: float,
|
43 |
+
max_res: int = 1024,
|
44 |
+
process_length: int = 195,
|
45 |
+
#
|
46 |
+
save_folder: str = "./demo_output",
|
47 |
+
window_size: int = 110,
|
48 |
+
overlap: int = 25,
|
49 |
+
target_fps: int = 15,
|
50 |
+
seed: int = 42,
|
51 |
+
track_time: bool = True,
|
52 |
+
save_npz: bool = False,
|
53 |
+
):
|
54 |
+
set_seed(seed)
|
55 |
+
|
56 |
+
frames, target_fps = read_video_frames(video, process_length, target_fps, max_res)
|
57 |
+
print(f"==> video name: {video}, frames shape: {frames.shape}")
|
58 |
+
|
59 |
+
# inference the depth map using the DepthCrafter pipeline
|
60 |
+
with torch.inference_mode():
|
61 |
+
res = pipe(
|
62 |
+
frames,
|
63 |
+
height=frames.shape[1],
|
64 |
+
width=frames.shape[2],
|
65 |
+
output_type="np",
|
66 |
+
guidance_scale=guidance_scale,
|
67 |
+
num_inference_steps=num_denoising_steps,
|
68 |
+
window_size=window_size,
|
69 |
+
overlap=overlap,
|
70 |
+
track_time=track_time,
|
71 |
+
).frames[0]
|
72 |
+
# convert the three-channel output to a single channel depth map
|
73 |
+
res = res.sum(-1) / res.shape[-1]
|
74 |
+
# normalize the depth map to [0, 1] across the whole video
|
75 |
+
res = (res - res.min()) / (res.max() - res.min())
|
76 |
+
# visualize the depth map and save the results
|
77 |
+
vis = vis_sequence_depth(res)
|
78 |
+
# save the depth map and visualization with the target FPS
|
79 |
+
save_path = os.path.join(save_folder, os.path.splitext(os.path.basename(video))[0])
|
80 |
+
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
81 |
+
if save_npz:
|
82 |
+
np.savez_compressed(save_path + ".npz", depth=res)
|
83 |
+
save_video(res, save_path + "_depth.mp4", fps=target_fps)
|
84 |
+
save_video(vis, save_path + "_vis.mp4", fps=target_fps)
|
85 |
+
save_video(frames, save_path + "_input.mp4", fps=target_fps)
|
86 |
+
return [
|
87 |
+
save_path + "_input.mp4",
|
88 |
+
save_path + "_vis.mp4",
|
89 |
+
# save_path + "_depth.mp4",
|
90 |
+
]
|
91 |
+
|
92 |
+
|
93 |
def construct_demo():
|
94 |
with gr.Blocks(analytics_enabled=False) as depthcrafter_iface:
|
95 |
gr.Markdown(
|
|
|
112 |
<a style='font-size:18px;color: #000000' href='https://depthcrafter.github.io/'> [Project Page] </a> </div>
|
113 |
"""
|
114 |
)
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
with gr.Row(equal_height=True):
|
117 |
with gr.Column(scale=1):
|
|
|
183 |
process_length,
|
184 |
],
|
185 |
outputs=[output_video_1, output_video_2],
|
186 |
+
fn=infer_depth,
|
187 |
cache_examples=False,
|
188 |
)
|
189 |
|
190 |
generate_btn.click(
|
191 |
+
fn=infer_depth,
|
192 |
inputs=[
|
193 |
input_video,
|
194 |
num_denoising_steps,
|