jbilcke-hf HF staff commited on
Commit
0ff2c60
1 Parent(s): f157d20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -2
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import spaces
2
 
3
  import gradio as gr
4
  # import gradio.helpers
@@ -8,6 +7,13 @@ from glob import glob
8
  from pathlib import Path
9
  from typing import Optional
10
 
 
 
 
 
 
 
 
11
  from PIL import Image
12
  from diffusers.utils import load_image, export_to_video
13
  from pipeline import StableVideoDiffusionPipeline
@@ -16,6 +22,13 @@ import random
16
  from safetensors import safe_open
17
  from lcm_scheduler import AnimateLCMSVDStochasticIterativeScheduler
18
 
 
 
 
 
 
 
 
19
 
20
  def get_safetensors_files():
21
  models_dir = "./safetensors"
@@ -39,6 +52,118 @@ def model_select(selected_file):
39
  return
40
 
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  noise_scheduler = AnimateLCMSVDStochasticIterativeScheduler(
43
  num_train_timesteps=40,
44
  sigma_min=0.002,
@@ -62,7 +187,6 @@ model_select("AnimateLCM-SVD-xt-1.1.safetensors")
62
 
63
  max_64_bit_int = 2**63 - 1
64
 
65
- @spaces.GPU
66
  def sample(
67
  image: Image,
68
  seed: Optional[int] = 42,
 
 
1
 
2
  import gradio as gr
3
  # import gradio.helpers
 
7
  from pathlib import Path
8
  from typing import Optional
9
 
10
+ import tempfile
11
+ import numpy as np
12
+ import cv2
13
+ import subprocess
14
+
15
+ from DeepCache import DeepCacheSDHelper
16
+
17
  from PIL import Image
18
  from diffusers.utils import load_image, export_to_video
19
  from pipeline import StableVideoDiffusionPipeline
 
22
  from safetensors import safe_open
23
  from lcm_scheduler import AnimateLCMSVDStochasticIterativeScheduler
24
 
25
+ SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
26
+
27
+ # is that 8 or 25?
28
+
29
+ hardcoded_fps = 25
30
+ hardcoded_duration_sec = 3
31
+
32
 
33
  def get_safetensors_files():
34
  models_dir = "./safetensors"
 
52
  return
53
 
54
 
55
+
56
+ # ----------------------------- FRAME INTERPOLATION ---------------------------------
57
+ # we cannot afford to use AI-based algorithms such as FILM or ST-MFNet,
58
+ # those are way too slow for AiTube which needs things to be as fast as possible
59
+ # -----------------------------------------------------------------------------------
60
+
61
+ def interpolate_video_frames(
62
+ input_file_path,
63
+ output_file_path,
64
+ output_fps=hardcoded_fps,
65
+ desired_duration=hardcoded_duration_sec,
66
+ original_duration=hardcoded_duration_sec,
67
+ output_width=None,
68
+ output_height=None,
69
+ use_cuda=False, # this requires FFmpeg to have been compiled with CUDA support (to try - I'm not sure the Hugging Face image has that by default)
70
+ verbose=False):
71
+
72
+ scale_factor = desired_duration / original_duration
73
+
74
+ filters = []
75
+
76
+ # Scaling if dimensions are provided
77
+ # note: upscaling produces disastrous results,
78
+ # it will double the compute time
79
+ # I think that's either because we are not hardware-accelerated,
80
+ # or because of the interpolation done after it, which thus become more computationally intensive
81
+ if output_width and output_height:
82
+ filters.append(f'scale={output_width}:{output_height}')
83
+
84
+
85
+ # note: from all fact, it looks like using a small macroblock is important for us,
86
+ # since the video resolution is very small (usually 512x288px)
87
+ interpolation_filter = f'minterpolate=mi_mode=mci:mc_mode=obmc:me=hexbs:vsbmc=1:mb_size=4:fps={output_fps}:scd=none,setpts={scale_factor}*PTS'
88
+ #- `mi_mode=mci`: Specifies motion compensated interpolation.
89
+ #- `mc_mode=obmc`: Overlapped block motion compensation is used.
90
+ #- `me=hexbs`: Hexagon-based search (motion estimation method).
91
+ #- `vsbmc=1`: Variable-size block motion compensation is enabled.
92
+ #- `mb_size=4`: Sets the macroblock size.
93
+ #- `fps={output_fps}`: Defines the output frame rate.
94
+ #- `scd=none`: Disables scene change detection entirely.
95
+ #- `setpts={scale_factor}*PTS`: Adjusts for the stretching of the video duration.
96
+
97
+ # Frame interpolation setup
98
+ filters.append(interpolation_filter)
99
+
100
+ # Combine all filters into a single filter complex
101
+ filter_complex = ','.join(filters)
102
+
103
+
104
+ cmd = [
105
+ 'ffmpeg',
106
+ '-i', input_file_path,
107
+ ]
108
+
109
+ # not supported by the current image, we will have to build it
110
+ if use_cuda:
111
+ cmd.extend(['-hwaccel', 'cuda', '-hwaccel_output_format', 'cuda'])
112
+
113
+ cmd.extend([
114
+ '-filter:v', filter_complex,
115
+ '-r', str(output_fps),
116
+ output_file_path
117
+ ])
118
+
119
+ # Adjust the log level based on the verbosity input
120
+ if not verbose:
121
+ cmd.insert(1, '-loglevel')
122
+ cmd.insert(2, 'error')
123
+
124
+ # Logging for debugging if verbose
125
+ if verbose:
126
+ print("output_fps:", output_fps)
127
+ print("desired_duration:", desired_duration)
128
+ print("original_duration:", original_duration)
129
+ print("cmd:", cmd)
130
+
131
+ try:
132
+ subprocess.run(cmd, check=True)
133
+ return output_file_path
134
+ except subprocess.CalledProcessError as e:
135
+ print("Failed to interpolate video. Error:", e)
136
+ return input_file_path # In case of error, return original path
137
+
138
+ # ----------------------------------- VIDEO ENCODING ---------------------------------
139
+ # The Diffusers utils hardcode MP4V as a codec which is not supported by all browsers.
140
+ # This is a critical issue for AiTube so we are forced to implement our own routine.
141
+ # ------------------------------------------------------------------------------------
142
+
143
+ def export_to_video_file(video_frames, output_video_path=None, fps=hardcoded_fps):
144
+ if output_video_path is None:
145
+ output_video_path = tempfile.NamedTemporaryFile(suffix=".webm").name
146
+
147
+ if isinstance(video_frames[0], np.ndarray):
148
+ video_frames = [(frame * 255).astype(np.uint8) for frame in video_frames]
149
+ elif isinstance(video_frames[0], Image.Image):
150
+ video_frames = [np.array(frame) for frame in video_frames]
151
+
152
+ # Use VP9 codec - don't freak out: yes, this will throw an exception, but this still works
153
+ # https://stackoverflow.com/a/61116338
154
+ # I suspect there is a bug somewhere and the actual hex code should be different
155
+ fourcc = cv2.VideoWriter_fourcc(*'VP90')
156
+ h, w, c = video_frames[0].shape
157
+ video_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (w, h), True)
158
+
159
+ for frame in video_frames:
160
+ # Ensure the video frame is in the correct color format
161
+ img = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
162
+ video_writer.write(img)
163
+ video_writer.release()
164
+
165
+ return output_video_path
166
+
167
  noise_scheduler = AnimateLCMSVDStochasticIterativeScheduler(
168
  num_train_timesteps=40,
169
  sigma_min=0.002,
 
187
 
188
  max_64_bit_int = 2**63 - 1
189
 
 
190
  def sample(
191
  image: Image,
192
  seed: Optional[int] = 42,