wbhu-tc commited on
Commit
6654f6a
1 Parent(s): 83d4b6c
Files changed (1) hide show
  1. app.py +86 -8
app.py CHANGED
@@ -1,12 +1,95 @@
 
 
 
1
  import spaces
2
  import gradio as gr
3
- from run import DepthCrafterDemo
 
 
 
 
 
 
 
 
 
 
4
 
5
  examples = [
6
  ["examples/example_01.mp4", 25, 1.2, 1024, 195],
7
  ]
8
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def construct_demo():
11
  with gr.Blocks(analytics_enabled=False) as depthcrafter_iface:
12
  gr.Markdown(
@@ -29,11 +112,6 @@ def construct_demo():
29
  <a style='font-size:18px;color: #000000' href='https://depthcrafter.github.io/'> [Project Page] </a> </div>
30
  """
31
  )
32
- # demo
33
- depthcrafter_demo = DepthCrafterDemo(
34
- unet_path="tencent/DepthCrafter",
35
- pre_train_path="stabilityai/stable-video-diffusion-img2vid-xt",
36
- )
37
 
38
  with gr.Row(equal_height=True):
39
  with gr.Column(scale=1):
@@ -105,12 +183,12 @@ def construct_demo():
105
  process_length,
106
  ],
107
  outputs=[output_video_1, output_video_2],
108
- fn=depthcrafter_demo.run,
109
  cache_examples=False,
110
  )
111
 
112
  generate_btn.click(
113
- fn=depthcrafter_demo.run,
114
  inputs=[
115
  input_video,
116
  num_denoising_steps,
 
1
+ import os
2
+
3
+ import numpy as np
4
  import spaces
5
  import gradio as gr
6
+ import torch
7
+ from diffusers.training_utils import set_seed
8
+
9
+ from depthcrafter.depth_crafter_ppl import DepthCrafterPipeline
10
+ from depthcrafter.unet import DiffusersUNetSpatioTemporalConditionModelDepthCrafter
11
+
12
+ import uuid
13
+ import random
14
+ from huggingface_hub import hf_hub_download
15
+
16
+ from depthcrafter.utils import read_video_frames, vis_sequence_depth, save_video
17
 
18
  examples = [
19
  ["examples/example_01.mp4", 25, 1.2, 1024, 195],
20
  ]
21
 
22
 
23
+ unet = DiffusersUNetSpatioTemporalConditionModelDepthCrafter.from_pretrained(
24
+ "tencent/DepthCrafter",
25
+ subfolder="unet",
26
+ low_cpu_mem_usage=True,
27
+ torch_dtype=torch.float16,
28
+ )
29
+ pipe = DepthCrafterPipeline.from_pretrained(
30
+ "stabilityai/stable-video-diffusion-img2vid-xt",
31
+ unet=unet,
32
+ torch_dtype=torch.float16,
33
+ variant="fp16",
34
+ )
35
+ pipe.to("cuda")
36
+
37
+
38
+ @spaces.GPU(duration=120)
39
+ def infer_depth(
40
+ video: str,
41
+ num_denoising_steps: int,
42
+ guidance_scale: float,
43
+ max_res: int = 1024,
44
+ process_length: int = 195,
45
+ #
46
+ save_folder: str = "./demo_output",
47
+ window_size: int = 110,
48
+ overlap: int = 25,
49
+ target_fps: int = 15,
50
+ seed: int = 42,
51
+ track_time: bool = True,
52
+ save_npz: bool = False,
53
+ ):
54
+ set_seed(seed)
55
+
56
+ frames, target_fps = read_video_frames(video, process_length, target_fps, max_res)
57
+ print(f"==> video name: {video}, frames shape: {frames.shape}")
58
+
59
+ # inference the depth map using the DepthCrafter pipeline
60
+ with torch.inference_mode():
61
+ res = pipe(
62
+ frames,
63
+ height=frames.shape[1],
64
+ width=frames.shape[2],
65
+ output_type="np",
66
+ guidance_scale=guidance_scale,
67
+ num_inference_steps=num_denoising_steps,
68
+ window_size=window_size,
69
+ overlap=overlap,
70
+ track_time=track_time,
71
+ ).frames[0]
72
+ # convert the three-channel output to a single channel depth map
73
+ res = res.sum(-1) / res.shape[-1]
74
+ # normalize the depth map to [0, 1] across the whole video
75
+ res = (res - res.min()) / (res.max() - res.min())
76
+ # visualize the depth map and save the results
77
+ vis = vis_sequence_depth(res)
78
+ # save the depth map and visualization with the target FPS
79
+ save_path = os.path.join(save_folder, os.path.splitext(os.path.basename(video))[0])
80
+ os.makedirs(os.path.dirname(save_path), exist_ok=True)
81
+ if save_npz:
82
+ np.savez_compressed(save_path + ".npz", depth=res)
83
+ save_video(res, save_path + "_depth.mp4", fps=target_fps)
84
+ save_video(vis, save_path + "_vis.mp4", fps=target_fps)
85
+ save_video(frames, save_path + "_input.mp4", fps=target_fps)
86
+ return [
87
+ save_path + "_input.mp4",
88
+ save_path + "_vis.mp4",
89
+ # save_path + "_depth.mp4",
90
+ ]
91
+
92
+
93
  def construct_demo():
94
  with gr.Blocks(analytics_enabled=False) as depthcrafter_iface:
95
  gr.Markdown(
 
112
  <a style='font-size:18px;color: #000000' href='https://depthcrafter.github.io/'> [Project Page] </a> </div>
113
  """
114
  )
 
 
 
 
 
115
 
116
  with gr.Row(equal_height=True):
117
  with gr.Column(scale=1):
 
183
  process_length,
184
  ],
185
  outputs=[output_video_1, output_video_2],
186
+ fn=infer_depth,
187
  cache_examples=False,
188
  )
189
 
190
  generate_btn.click(
191
+ fn=infer_depth,
192
  inputs=[
193
  input_video,
194
  num_denoising_steps,