|
|
|
|
|
from __future__ import annotations |
|
|
|
import os |
|
import sys |
|
import warnings |
|
|
|
|
|
|
|
|
|
|
|
|
|
warnings.filterwarnings("ignore") |
|
|
|
import gradio as gr |
|
|
|
from inference import InferencePipeline |
|
|
|
|
|
class InferenceUtil: |
|
def __init__(self, hf_token: str | None): |
|
self.hf_token = hf_token |
|
|
|
def load_model_info(self, model_id: str) -> tuple[str, str]: |
|
|
|
try: |
|
card = InferencePipeline.get_model_card(model_id, self.hf_token) |
|
except Exception: |
|
return '', '' |
|
|
|
base_model = getattr(card.data, 'base_model', '') |
|
protagonist = getattr(card.data, 'protagonist', '') |
|
training_prompt = getattr(card.data, 'training_prompt', '') |
|
return protagonist, training_prompt |
|
|
|
|
|
|
|
|
|
HF_TOKEN = os.getenv('HF_TOKEN') |
|
|
|
pipe = InferencePipeline(HF_TOKEN) |
|
app = InferenceUtil(HF_TOKEN) |
|
|
|
with gr.Blocks(css='style.css') as demo: |
|
|
|
|
|
gr.HTML( |
|
""" |
|
<div style="text-align: center; max-width: 1200px; margin: 20px auto;"> |
|
<h1 style="font-weight: 900; font-size: 2rem; margin: 0rem"> |
|
Make-A-Protagonist: |
|
<br> |
|
Generic Video Editing with An Ensemble of Experts |
|
</h1> |
|
<h2 style="font-weight: 450; font-size: 1rem; margin: 0rem"> |
|
<a href="https://yuyangzhao.com">Yuyang Zhao</a><sup>1</sup> |
|
<a href="https://xieenze.github.io/">Enze Xie</a><sup>2</sup> |
|
<a href="https://scholar.google.com.sg/citations?user=2p7x6OUAAAAJ&hl=en">Lanqing Hong</a><sup>2</sup> |
|
<a href="https://scholar.google.com.sg/citations?user=XboZC1AAAAAJ&hl=en">Zhenguo Li</a><sup>2</sup> |
|
<a href="https://www.comp.nus.edu.sg/~leegh/">Gim Hee Lee</a><sup>1</sup> |
|
</h2> |
|
|
|
<h2 style="font-weight: 450; font-size: 1rem; margin: 0rem"> |
|
<sup>1 </sup>National University of Singapore |
|
<sup>2 </sup>Huawei Noah's Ark Lab</span> |
|
</h2> |
|
|
|
<h2 style="font-weight: 450; font-size: 1rem; margin: 0rem"> |
|
<span class="link-block"> |
|
[<a href="https://arxiv.org/abs/2305.08850" target="_blank" |
|
class="external-link "> |
|
<span class="icon"> |
|
<i class="ai ai-arxiv"></i> |
|
</span> |
|
<span>arXiv</span> |
|
</a>] |
|
</span> |
|
|
|
<!-- Github link --> |
|
<span class="link-block"> |
|
[<a href="https://github.com/Make-A-Protagonist/Make-A-Protagonist" target="_blank" |
|
class="external-link "> |
|
<span class="icon"> |
|
<i class="fab fa-github"></i> |
|
</span> |
|
<span>Code</span> |
|
</a>] |
|
</span> |
|
|
|
<!-- Github link --> |
|
<span class="link-block"> |
|
[<a href="https://make-a-protagonist.github.io/" target="_blank" |
|
class="external-link "> |
|
<span class="icon"> |
|
<i class="fab fa-github"></i> |
|
</span> |
|
<span>Homepage</span> |
|
</a>] |
|
</span> |
|
|
|
</h2> |
|
<h2 style="font-weight: 450; font-size: 1rem; margin-top: 0.5rem; margin-bottom: 0.5rem"> |
|
TL;DR: The first framework for generic video editing with both visual and textual clues. |
|
</h2> |
|
</div> |
|
""") |
|
|
|
|
|
gr.HTML(""" |
|
<p>We provide a <a href="https://github.com/Make-A-Protagonist/Make-A-Protagonist/blob/main/docs/demo_guidance.md"> Demo Guidance </a> to help users to choose hyperparameters when editing videos. |
|
<p>You may duplicate the space and upgrade GPU for better performance and faster inference without waiting in the queue. |
|
<p>Alternatively, try our GitHub <a href=https://github.com/Make-A-Protagonist/Make-A-Protagonist> code </a> on your GPU. |
|
</p>""") |
|
|
|
|
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Box(): |
|
model_id = gr.Dropdown( |
|
label='Model ID', |
|
choices=[ |
|
'Make-A-Protagonist/ikun', |
|
'Make-A-Protagonist/huaqiang', |
|
'Make-A-Protagonist/yanzi', |
|
'Make-A-Protagonist/car-turn', |
|
], |
|
value='Make-A-Protagonist/ikun') |
|
|
|
with gr.Row(): |
|
base_model_used_for_training = gr.Textbox( |
|
label='Protagonist', interactive=False, value='man') |
|
prompt_used_for_training = gr.Textbox( |
|
label='Training prompt', interactive=False, value='A man is playing basketball') |
|
with gr.Box(): |
|
ref_image = gr.Image(label='Reference Image', type='pil', visible=True).style(height="auto") |
|
ref_pro_prompt = gr.Textbox(label='Reference Image Protagonist Prompt', |
|
max_lines=1, |
|
placeholder='Example: "man"') |
|
|
|
prompt = gr.Textbox(label='Prompt', |
|
max_lines=1, |
|
placeholder='Example: "A panda is surfing"') |
|
video_length = gr.Slider(label='Video length', |
|
minimum=4, |
|
maximum=6, |
|
step=1, |
|
value=6) |
|
fps = gr.Slider(label='FPS', |
|
minimum=1, |
|
maximum=6, |
|
step=1, |
|
value=3) |
|
seed = gr.Slider(label='Seed', |
|
minimum=0, |
|
maximum=100000, |
|
step=1, |
|
value=0) |
|
|
|
with gr.Accordion('ControlNet Parameters', open=True): |
|
control_pose = gr.Slider(label='Pose', |
|
minimum=0, |
|
maximum=1, |
|
step=0.1, |
|
value=.5) |
|
control_depth = gr.Slider(label='Depth', |
|
minimum=0, |
|
maximum=1, |
|
step=0.1, |
|
value=.5) |
|
|
|
with gr.Accordion('Editing Function', open=True): |
|
with gr.Row(): |
|
source_pro = gr.Slider(label='Source Protagonist', |
|
minimum=0, |
|
maximum=1, |
|
step=1, |
|
value=0) |
|
source_bg = gr.Slider(label='Source Background', |
|
minimum=0, |
|
maximum=1, |
|
step=1, |
|
value=0) |
|
|
|
with gr.Accordion('Other Parameters', open=False): |
|
num_steps = gr.Slider(label='Number of Steps', |
|
minimum=0, |
|
maximum=100, |
|
step=1, |
|
value=50) |
|
|
|
start_step = gr.Slider(label='Mask Starting Step', |
|
minimum=0, |
|
maximum=100, |
|
step=1, |
|
value=0) |
|
|
|
guidance_scale = gr.Slider(label='CFG Scale', |
|
minimum=0, |
|
maximum=50, |
|
step=0.1, |
|
value=12.5) |
|
|
|
noise_level = gr.Slider(label='Noise Level', |
|
minimum=0, |
|
maximum=999, |
|
step=1, |
|
value=0) |
|
|
|
|
|
run_button = gr.Button('Generate') |
|
|
|
gr.Markdown(''' |
|
- It takes a few minutes to download model first. |
|
- It takes one minute to load model and conduct DDIM inverse |
|
''') |
|
with gr.Column(): |
|
result = gr.Video(label='Result') |
|
with gr.Row(): |
|
examples = [ |
|
[ |
|
'Make-A-Protagonist/ikun', |
|
'A man is playing basketball on the beach, anime style.', |
|
6, |
|
3, |
|
33, |
|
50, |
|
12.5, |
|
'data/ikun/reference_images/zhongli.jpg', |
|
'man', |
|
0, |
|
0, |
|
0.5, |
|
0.5, |
|
0, |
|
0 |
|
], |
|
|
|
[ |
|
'Make-A-Protagonist/huaqiang', |
|
'Elon Musk walking down the street.', |
|
6, |
|
3, |
|
33, |
|
50, |
|
12.5, |
|
'data/huaqiang/reference_images/musk.jpg', |
|
'man', |
|
0, |
|
0, |
|
0.5, |
|
0.5, |
|
0, |
|
1, |
|
], |
|
|
|
[ |
|
'Make-A-Protagonist/yanzi', |
|
'A panda walking down the snowy street.', |
|
6, |
|
3, |
|
33, |
|
50, |
|
12.5, |
|
'data/yanzi/reference_images/panda.jpeg', |
|
'panda', |
|
0, |
|
0, |
|
0.5, |
|
0.5, |
|
0, |
|
0 |
|
], |
|
|
|
[ |
|
'Make-A-Protagonist/car-turn', |
|
'A car moving in the desert.', |
|
6, |
|
3, |
|
33, |
|
50, |
|
12.5, |
|
'data/car-turn/reference_images/audi.jpeg', |
|
'car', |
|
0, |
|
0, |
|
0.0, |
|
1.0, |
|
0, |
|
0 |
|
], |
|
|
|
[ |
|
'Make-A-Protagonist/car-turn', |
|
'A Suzuki Jimny driving down a mountain road in the rain.', |
|
6, |
|
3, |
|
33, |
|
50, |
|
12.5, |
|
'data/car-turn/images/0000.jpg', |
|
'car', |
|
0, |
|
0, |
|
0.0, |
|
1.0, |
|
1, |
|
0 |
|
], |
|
|
|
] |
|
gr.Examples(examples=examples, |
|
inputs=[ |
|
model_id, |
|
prompt, |
|
video_length, |
|
fps, |
|
seed, |
|
num_steps, |
|
guidance_scale, |
|
ref_image, |
|
ref_pro_prompt, |
|
noise_level, |
|
start_step, |
|
control_pose, |
|
control_depth, |
|
source_pro, |
|
source_bg, |
|
], |
|
outputs=result, |
|
fn=pipe.run, |
|
cache_examples=os.getenv('SYSTEM') == 'spaces') |
|
|
|
model_id.change(fn=app.load_model_info, |
|
inputs=model_id, |
|
outputs=[ |
|
base_model_used_for_training, |
|
prompt_used_for_training, |
|
]) |
|
|
|
|
|
|
|
inputs = [ |
|
model_id, |
|
prompt, |
|
video_length, |
|
fps, |
|
seed, |
|
num_steps, |
|
guidance_scale, |
|
ref_image, |
|
ref_pro_prompt, |
|
noise_level, |
|
start_step, |
|
control_pose, |
|
control_depth, |
|
source_pro, |
|
source_bg, |
|
] |
|
prompt.submit(fn=pipe.run, inputs=inputs, outputs=result) |
|
run_button.click(fn=pipe.run, inputs=inputs, outputs=result) |
|
|
|
demo.queue().launch() |
|
|