Spaces:

modelscope
/

ExVideo-SVD-128f-v1

Runtime error

App Files Files Community

Oranblock commited on Aug 2, 2024

Commit

8f729c0

verified ·

1 Parent(s): fcddaaf

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -31

app.py CHANGED Viewed

@@ -1,24 +1,30 @@
 import os
-os.system("git clone https://github.com/modelscope/DiffSynth-Studio.git")
-os.system("cp -r DiffSynth-Studio/diffsynth ./")
-os.system("pip install -r DiffSynth-Studio/requirements.txt")
-from diffsynth import save_video, ModelManager, SVDVideoPipeline
-from diffsynth import ModelManager
-import torch, os, random, time
-import gradio as gr
 import numpy as np
 from PIL import Image
 import spaces
 def get_i2v_pipeline():
-    model_manager = ModelManager(torch_dtype=torch.float16, device="cuda",
-                                 model_id_list=["stable-video-diffusion-img2vid-xt", "ExVideo-SVD-128f-v1"],
-                                 downloading_priority=["HuggingFace"])
     pipe = SVDVideoPipeline.from_model_manager(model_manager)
     return pipe
 @spaces.GPU(duration=280)
 def sample(image, seed, randomize_seed, motion_bucket_id, num_inference_steps):
     if randomize_seed:
@@ -36,45 +42,47 @@ def sample(image, seed, randomize_seed, motion_bucket_id, num_inference_steps):
     save_video(video, file_path, fps=30, quality=7)
     return file_path, seed
 def crop_and_resize(image):
     height = 512
     width = 512
     image = np.array(image)
     image_height, image_width, _ = image.shape
     if image_height / image_width < height / width:
-        croped_width = int(image_height / height * width)
-        left = (image_width - croped_width) // 2
-        image = image[:, left: left+croped_width]
         image = Image.fromarray(image).convert("RGB").resize((width, height))
     else:
-        croped_height = int(image_width / width * height)
-        left = (image_height - croped_height) // 2
-        image = image[left: left+croped_height, :]
         image = Image.fromarray(image).convert("RGB").resize((width, height))
     return image
 pipe = get_i2v_pipeline()
 def process_examples(image):
-    file_path, seed = sample(image, seed=0, randomize_seed=True, motion_bucket_id=100, num_inference_steps=25 )
     return file_path, seed
 with gr.Blocks() as demo:
     gr.Markdown('''
-# ExVideo
-ExVideo is a post-tuning technique aimed at enhancing the capability of video generation models. We have extended Stable Video Diffusion to achieve the generation of long videos up to 128 frames.
-This is the first model we have made public. Due to limitations in computational resources, this model was trained on about 40,000 videos using 8x A100 GPUs for approximately one week. Therefore, the model may sometimes generate content that does not conform to real-world principles. Please look forward to the release of our subsequent models.
-To use this model, please refer to [DiffSynth](https://github.com/modelscope/DiffSynth-Studio).
-* [Project Page](https://ecnu-cilab.github.io/ExVideoProjectPage/)
-* [Source Code](https://github.com/modelscope/DiffSynth-Studio)
-* [Technical report](https://arxiv.org/abs/2406.14130)
-''')
     with gr.Row():
         with gr.Column():
             image = gr.Image(label="Upload your image", type="pil")
@@ -85,7 +93,7 @@ To use this model, please refer to [DiffSynth](https://github.com/modelscope/Dif
         randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
         motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to synthesize", value=100, minimum=0, maximum=127)
         num_inference_steps = gr.Slider(label="Inference steps", value=25, minimum=1, maximum=50)
     image.upload(fn=crop_and_resize, inputs=image, outputs=image, queue=False)
     generate_btn.click(fn=sample, inputs=[image, seed, randomize_seed, motion_bucket_id, num_inference_steps], outputs=[video, seed], api_name="video")
     gr.Examples(
@@ -103,4 +111,4 @@ To use this model, please refer to [DiffSynth](https://github.com/modelscope/Dif
     )
 if __name__ == "__main__":
-    demo.launch()

 import os
+import torch
+import random
+import time
 import numpy as np
 from PIL import Image
+import gradio as gr
+from diffsynth import save_video, ModelManager, SVDVideoPipeline
 import spaces
+# Clone the repository and install dependencies
+os.system("git clone https://github.com/modelscope/DiffSynth-Studio.git")
+os.system("cp -r DiffSynth-Studio/diffsynth ./")
+os.system("pip install -r DiffSynth-Studio/requirements.txt")
+# Function to initialize the model pipeline
 def get_i2v_pipeline():
+    model_manager = ModelManager(
+        torch_dtype=torch.float16,
+        device="cuda",
+        model_id_list=["stable-video-diffusion-img2vid-xt", "ExVideo-SVD-128f-v1"],
+        downloading_priority=["HuggingFace"]
+    )
     pipe = SVDVideoPipeline.from_model_manager(model_manager)
     return pipe
+# Function to sample and generate video
 @spaces.GPU(duration=280)
 def sample(image, seed, randomize_seed, motion_bucket_id, num_inference_steps):
     if randomize_seed:
     save_video(video, file_path, fps=30, quality=7)
     return file_path, seed
+# Function to crop and resize the image
 def crop_and_resize(image):
     height = 512
     width = 512
     image = np.array(image)
     image_height, image_width, _ = image.shape
     if image_height / image_width < height / width:
+        cropped_width = int(image_height / height * width)
+        left = (image_width - cropped_width) // 2
+        image = image[:, left: left + cropped_width]
         image = Image.fromarray(image).convert("RGB").resize((width, height))
     else:
+        cropped_height = int(image_width / width * height)
+        left = (image_height - cropped_height) // 2
+        image = image[left: left + cropped_height, :]
         image = Image.fromarray(image).convert("RGB").resize((width, height))
     return image
+# Initialize the model pipeline
 pipe = get_i2v_pipeline()
+# Function to process examples
 def process_examples(image):
+    file_path, seed = sample(image, seed=0, randomize_seed=True, motion_bucket_id=100, num_inference_steps=25)
     return file_path, seed
+# Create the Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown('''
+    # ExVideo
+    ExVideo is a post-tuning technique aimed at enhancing the capability of video generation models. We have extended Stable Video Diffusion to achieve the generation of long videos up to 128 frames.
+    This is the first model we have made public. Due to limitations in computational resources, this model was trained on about 40,000 videos using 8x A100 GPUs for approximately one week. Therefore, the model may sometimes generate content that does not conform to real-world principles. Please look forward to the release of our subsequent models.
+    To use this model, please refer to [DiffSynth](https://github.com/modelscope/DiffSynth-Studio).
+    * [Project Page](https://ecnu-cilab.github.io/ExVideoProjectPage/)
+    * [Source Code](https://github.com/modelscope/DiffSynth-Studio)
+    * [Technical report](https://arxiv.org/abs/2406.14130)
+    ''')
     with gr.Row():
         with gr.Column():
             image = gr.Image(label="Upload your image", type="pil")
         randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
         motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to synthesize", value=100, minimum=0, maximum=127)
         num_inference_steps = gr.Slider(label="Inference steps", value=25, minimum=1, maximum=50)
     image.upload(fn=crop_and_resize, inputs=image, outputs=image, queue=False)
     generate_btn.click(fn=sample, inputs=[image, seed, randomize_seed, motion_bucket_id, num_inference_steps], outputs=[video, seed], api_name="video")
     gr.Examples(
     )
 if __name__ == "__main__":
+    demo.launch()