Spaces:
Sleeping
Sleeping
Commit
•
c9d5420
1
Parent(s):
ef4b87c
Update app.py
Browse files
app.py
CHANGED
@@ -187,16 +187,32 @@ pipe = StableVideoDiffusionPipeline.from_pretrained(
|
|
187 |
variant="fp16",
|
188 |
)
|
189 |
pipe.to("cuda")
|
190 |
-
|
191 |
model_select("AnimateLCM-SVD-xt-1.1.safetensors")
|
192 |
-
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) # for faster inference
|
193 |
|
194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
max_64_bit_int = 2**63 - 1
|
196 |
|
197 |
def sample(
|
198 |
secret_token: str,
|
199 |
-
input_image_base64:
|
200 |
seed: Optional[int] = 42,
|
201 |
randomize_seed: bool = False,
|
202 |
motion_bucket_id: int = 80,
|
@@ -214,7 +230,7 @@ def sample(
|
|
214 |
f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
|
215 |
|
216 |
image = decode_data_uri_to_image(input_image_base64)
|
217 |
-
|
218 |
print(f"seed={seed}\nrandomize_seed={randomize_seed}\nmotion_bucket_id={motion_bucket_id}\nfps_id={fps_id}\nmax_guidance_scale={max_guidance_scale}\nmin_guidance_scale={min_guidance_scale}\nwidth={width}\nheight={height}\nnum_inference_steps={num_inference_steps}\ndecoding_t={decoding_t}")
|
219 |
|
220 |
if image.mode == "RGBA":
|
@@ -246,7 +262,7 @@ def sample(
|
|
246 |
# Read the content of the video file and encode it to base64
|
247 |
with open(video_path, "rb") as video_file:
|
248 |
video_base64 = base64.b64encode(video_file.read()).decode('utf-8')
|
249 |
-
|
250 |
# Prepend the appropriate data URI header with MIME type
|
251 |
return 'data:video/mp4;base64,' + video_base64
|
252 |
|
|
|
187 |
variant="fp16",
|
188 |
)
|
189 |
pipe.to("cuda")
|
190 |
+
pipe.enable_model_cpu_offload() # for smaller cost
|
191 |
model_select("AnimateLCM-SVD-xt-1.1.safetensors")
|
192 |
+
# pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) # for faster inference
|
193 |
|
194 |
|
195 |
+
helper = DeepCacheSDHelper(pipe=pipe)
|
196 |
+
helper.set_params(
|
197 |
+
# cache_interval means the frequency of feature caching, specified as the number of steps between each cache operation.
|
198 |
+
# with AnimateDiff this seems to have large effects, so we cannot use large values,
|
199 |
+
# even with cache_interval=3 I notice a big degradation in quality
|
200 |
+
cache_interval=2,
|
201 |
+
|
202 |
+
# cache_branch_id identifies which branch of the network (ordered from the shallowest to the deepest layer) is responsible for executing the caching processes.
|
203 |
+
# Note Julian: I should create my own benchmarks for this
|
204 |
+
cache_branch_id=0,
|
205 |
+
|
206 |
+
# Opting for a lower cache_branch_id or a larger cache_interval can lead to faster inference speed at the expense of reduced image quality
|
207 |
+
#(ablation experiments of these two hyperparameters can be found in the paper).
|
208 |
+
)
|
209 |
+
helper.enable()
|
210 |
+
|
211 |
max_64_bit_int = 2**63 - 1
|
212 |
|
213 |
def sample(
|
214 |
secret_token: str,
|
215 |
+
input_image_base64: str,
|
216 |
seed: Optional[int] = 42,
|
217 |
randomize_seed: bool = False,
|
218 |
motion_bucket_id: int = 80,
|
|
|
230 |
f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
|
231 |
|
232 |
image = decode_data_uri_to_image(input_image_base64)
|
233 |
+
|
234 |
print(f"seed={seed}\nrandomize_seed={randomize_seed}\nmotion_bucket_id={motion_bucket_id}\nfps_id={fps_id}\nmax_guidance_scale={max_guidance_scale}\nmin_guidance_scale={min_guidance_scale}\nwidth={width}\nheight={height}\nnum_inference_steps={num_inference_steps}\ndecoding_t={decoding_t}")
|
235 |
|
236 |
if image.mode == "RGBA":
|
|
|
262 |
# Read the content of the video file and encode it to base64
|
263 |
with open(video_path, "rb") as video_file:
|
264 |
video_base64 = base64.b64encode(video_file.read()).decode('utf-8')
|
265 |
+
|
266 |
# Prepend the appropriate data URI header with MIME type
|
267 |
return 'data:video/mp4;base64,' + video_base64
|
268 |
|