File size: 3,856 Bytes
2f3d8a1
aa0a087
 
 
984a212
2f3d8a1
 
 
 
 
 
 
8933be3
 
 
5394b82
8933be3
 
 
984a212
 
 
 
 
 
 
 
 
8933be3
 
984a212
 
2f3d8a1
984a212
2f3d8a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8933be3
2f3d8a1
 
8933be3
2f3d8a1
 
8933be3
2f3d8a1
 
 
 
3384297
2f3d8a1
 
8933be3
2f3d8a1
 
a066be5
 
 
 
 
 
 
 
2f3d8a1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import gradio as gr
import numpy as np 
import tempfile
import imageio

import torch
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler

pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_576w", torch_dtype=torch.float16)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()

caption = gr.load(name="spaces/fffiloni/CoCa-clone")

def create_image_caption(image_init):
    cap = caption(image_init, "Nucleus sampling", 1.2, 0.5, 5, 20, fn_index=0)
    print("cap: " + cap)
    return cap

def export_to_video(frames: np.ndarray, fps: int) -> str:
    frames = np.clip((frames * 255), 0, 255).astype(np.uint8)
    out_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
    writer = imageio.get_writer(out_file.name, format="FFMPEG", fps=fps)
    for frame in frames:
        writer.append_data(frame)
    writer.close()
    return out_file.name

def infer(image_init):
    prompt = create_image_caption(image_init)
    video_frames = pipe(prompt, num_inference_steps=40, height=320, width=576, num_frames=24).frames[0]
    video_path = export_to_video(video_frames, 12)
    print(video_path)
    return prompt, video_path

css = """
#col-container {max-width: 510px; margin-left: auto; margin-right: auto;}
a {text-decoration-line: underline; font-weight: 600;}
.animate-spin {
  animation: spin 1s linear infinite;
}

@keyframes spin {
  from {
      transform: rotate(0deg);
  }
  to {
      transform: rotate(360deg);
  }
}

#share-btn-container {
  display: flex; 
  padding-left: 0.5rem !important; 
  padding-right: 0.5rem !important; 
  background-color: #000000; 
  justify-content: center; 
  align-items: center; 
  border-radius: 9999px !important; 
  max-width: 13rem;
}

#share-btn-container:hover {
  background-color: #060606;
}

#share-btn {
  all: initial; 
  color: #ffffff;
  font-weight: 600; 
  cursor:pointer; 
  font-family: 'IBM Plex Sans', sans-serif; 
  margin-left: 0.5rem !important; 
  padding-top: 0.5rem !important; 
  padding-bottom: 0.5rem !important;
  right:0;
}

#share-btn * {
  all: unset;
}

#share-btn-container div:nth-child(-n+2){
  width: auto !important;
  min-height: 0px !important;
}

#share-btn-container .wrap {
  display: none !important;
}

#share-btn-container.hidden {
  display: none!important;
}
img[src*='#center'] { 
    display: block;
    margin: auto;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown(
            """
            <h1 style="text-align: center;">Zeroscope Image-to-Video</h1>
            <p style="text-align: center;">
            A watermark-free Modelscope-based video model optimized for producing high-quality 16:9 compositions and a smooth video output. <br />
            This demo is a variation that lets you upload an image as reference for video generation. 
            </p>
            
            [![Duplicate this Space](https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg#center)](https://huggingface.co/spaces/fffiloni/zeroscope-img-to-video?duplicate=true)
            
            """
        )

        image_init = gr.Image(label="Image Init", type="filepath", sources=["upload"], elem_id="image-init")
        #inference_steps = gr.Slider(label="Inference Steps", minimum=10, maximum=100, step=1, value=40, interactive=False)
        submit_btn = gr.Button("Submit")
        coca_cap = gr.Textbox(label="Caption", placeholder="CoCa Caption will be displayed here", elem_id="coca-cap-in")
        video_result = gr.Video(label="Video Output", elem_id="video-output")

    submit_btn.click(
        fn=infer,
        inputs=[image_init],
        outputs=[coca_cap, video_result],
        show_api=False
    )

demo.queue(max_size=12).launch(show_api=False)