ai-tube-model-als-4

Running on A10G

App Files Files Community

jbilcke-hf HF staff commited on May 13

Commit

0ff2c60

•

1 Parent(s): f157d20

Update app.py

Browse files

Files changed (1) hide show

app.py +126 -2

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import spaces
 import gradio as gr
 # import gradio.helpers
@@ -8,6 +7,13 @@ from glob import glob
 from pathlib import Path
 from typing import Optional
 from PIL import Image
 from diffusers.utils import load_image, export_to_video
 from pipeline import StableVideoDiffusionPipeline
@@ -16,6 +22,13 @@ import random
 from safetensors import safe_open
 from lcm_scheduler import AnimateLCMSVDStochasticIterativeScheduler
 def get_safetensors_files():
     models_dir = "./safetensors"
@@ -39,6 +52,118 @@ def model_select(selected_file):
     return
 noise_scheduler = AnimateLCMSVDStochasticIterativeScheduler(
     num_train_timesteps=40,
     sigma_min=0.002,
@@ -62,7 +187,6 @@ model_select("AnimateLCM-SVD-xt-1.1.safetensors")
 max_64_bit_int = 2**63 - 1
-@spaces.GPU
 def sample(
     image: Image,
     seed: Optional[int] = 42,

 import gradio as gr
 # import gradio.helpers
 from pathlib import Path
 from typing import Optional
+import tempfile
+import numpy as np
+import cv2
+import subprocess
+from DeepCache import DeepCacheSDHelper
 from PIL import Image
 from diffusers.utils import load_image, export_to_video
 from pipeline import StableVideoDiffusionPipeline
 from safetensors import safe_open
 from lcm_scheduler import AnimateLCMSVDStochasticIterativeScheduler
+SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
+# is that 8 or 25?
+hardcoded_fps = 25
+hardcoded_duration_sec = 3
 def get_safetensors_files():
     models_dir = "./safetensors"
     return
+# ----------------------------- FRAME INTERPOLATION ---------------------------------
+# we cannot afford to use AI-based algorithms such as FILM or ST-MFNet,
+# those are way too slow for AiTube which needs things to be as fast as possible
+# -----------------------------------------------------------------------------------
+def interpolate_video_frames(
+    input_file_path,
+    output_file_path,
+    output_fps=hardcoded_fps,
+    desired_duration=hardcoded_duration_sec,
+    original_duration=hardcoded_duration_sec,
+    output_width=None,
+    output_height=None,
+    use_cuda=False, # this requires FFmpeg to have been compiled with CUDA support (to try - I'm not sure the Hugging Face image has that by default)
+    verbose=False):
+    scale_factor = desired_duration / original_duration
+    filters = []
+    # Scaling if dimensions are provided
+    # note: upscaling produces disastrous results,
+    # it will double the compute time
+    # I think that's either because we are not hardware-accelerated,
+    # or because of the interpolation done after it, which thus become more computationally intensive
+    if output_width and output_height:
+        filters.append(f'scale={output_width}:{output_height}')
+    # note: from all fact, it looks like using a small macroblock is important for us,
+    # since the video resolution is very small (usually 512x288px)
+    interpolation_filter = f'minterpolate=mi_mode=mci:mc_mode=obmc:me=hexbs:vsbmc=1:mb_size=4:fps={output_fps}:scd=none,setpts={scale_factor}*PTS'
+    #- `mi_mode=mci`: Specifies motion compensated interpolation.
+    #- `mc_mode=obmc`: Overlapped block motion compensation is used.
+    #- `me=hexbs`: Hexagon-based search (motion estimation method).
+    #- `vsbmc=1`: Variable-size block motion compensation is enabled.
+    #- `mb_size=4`: Sets the macroblock size.
+    #- `fps={output_fps}`: Defines the output frame rate.
+    #- `scd=none`: Disables scene change detection entirely.
+    #- `setpts={scale_factor}*PTS`: Adjusts for the stretching of the video duration.
+    # Frame interpolation setup
+    filters.append(interpolation_filter)
+    # Combine all filters into a single filter complex
+    filter_complex = ','.join(filters)
+    cmd = [
+        'ffmpeg',
+        '-i', input_file_path,
+    ]
+    # not supported by the current image, we will have to build it
+    if use_cuda:
+        cmd.extend(['-hwaccel', 'cuda', '-hwaccel_output_format', 'cuda'])
+    cmd.extend([
+        '-filter:v', filter_complex,
+        '-r', str(output_fps),
+        output_file_path
+    ])
+    # Adjust the log level based on the verbosity input
+    if not verbose:
+        cmd.insert(1, '-loglevel')
+        cmd.insert(2, 'error')
+    # Logging for debugging if verbose
+    if verbose:
+        print("output_fps:", output_fps)
+        print("desired_duration:", desired_duration)
+        print("original_duration:", original_duration)
+        print("cmd:", cmd)
+    try:
+        subprocess.run(cmd, check=True)
+        return output_file_path
+    except subprocess.CalledProcessError as e:
+        print("Failed to interpolate video. Error:", e)
+        return input_file_path  # In case of error, return original path
+# ----------------------------------- VIDEO ENCODING ---------------------------------
+# The Diffusers utils hardcode MP4V as a codec which is not supported by all browsers.
+# This is a critical issue for AiTube so we are forced to implement our own routine.
+# ------------------------------------------------------------------------------------
+def export_to_video_file(video_frames, output_video_path=None, fps=hardcoded_fps):
+    if output_video_path is None:
+        output_video_path = tempfile.NamedTemporaryFile(suffix=".webm").name
+    if isinstance(video_frames[0], np.ndarray):
+        video_frames = [(frame * 255).astype(np.uint8) for frame in video_frames]
+    elif isinstance(video_frames[0], Image.Image):
+        video_frames = [np.array(frame) for frame in video_frames]
+    # Use VP9 codec - don't freak out: yes, this will throw an exception, but this still works
+    # https://stackoverflow.com/a/61116338
+    # I suspect there is a bug somewhere and the actual hex code should be different
+    fourcc = cv2.VideoWriter_fourcc(*'VP90')
+    h, w, c = video_frames[0].shape
+    video_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (w, h), True)
+    for frame in video_frames:
+        # Ensure the video frame is in the correct color format
+        img = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+        video_writer.write(img)
+    video_writer.release()
+    return output_video_path
 noise_scheduler = AnimateLCMSVDStochasticIterativeScheduler(
     num_train_timesteps=40,
     sigma_min=0.002,
 max_64_bit_int = 2**63 - 1
 def sample(
     image: Image,
     seed: Optional[int] = 42,