import gradio as gr
import torch
from diffusers import StableDiffusionPipeline, DDIMScheduler
from utils import *
# load sd model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_id = "stabilityai/stable-diffusion-2-1-base"
inv_pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(device)
inv_pipe.scheduler = DDIMScheduler.from_pretrained(model_id, subfolder="scheduler")
def randomize_seed_fn():
seed = random.randint(0, np.iinfo(np.int32).max)
return seed
def reset_do_inversion():
return True
def preprocess_and_invert(video,
frames,
latents,
inverted_latents,
seed,
randomize_seed,
do_inversion,
height:int = 512,
weidth: int = 512,
# save_dir: str = "latents",
steps: int = 500,
batch_size: int = 8,
# save_steps: int = 50,
n_frames: int = 40,
inversion_prompt:str = ''
):
if do_inversion or randomize_seed:
# save_video_frames(data_path, img_size=(height, weidth))
frames = video_to_frames(video, img_size=(height, weidth))
# data_path = os.path.join('data', Path(video_path).stem)
toy_scheduler = DDIMScheduler.from_pretrained(model_id, subfolder="scheduler")
toy_scheduler.set_timesteps(save_steps)
timesteps_to_save, num_inference_steps = get_timesteps(toy_scheduler, num_inference_steps=save_steps,
strength=1.0,
device=device)
if randomize_seed:
seed = randomize_seed_fn()
seed_everything(seed)
frames, latents = get_data(inv_pipe, frames, n_frames)
inverted_latents = extract_latents(inv_pipe, num_steps = steps,
latent_frames = latents,
batch_size = batch_size,
timesteps_to_save = timesteps_to_save,
inversion_prompt = inversion_prompt,)
frames = gr.State(value=frames)
latents = gr.State(value=latents)
inverted_latents = gr.State(value=inverted_latents)
do_inversion = False
return frames, latents, inverted_latents, do_inversion
########
# demo #
########
intro = """
TokenFlow
"""
with gr.Blocks(css="style.css") as demo:
gr.HTML(intro)
frames = gr.State()
inverted_latents = gr.State()
latents = gr.State()
do_inversion = gr.State(value=True)
with gr.Row():
input_vid = gr.Video(label="Input Video", interactive=True, elem_id="input_video")
output_vid = gr.Video(label="Edited Video", interactive=False, elem_id="output_video")
input_vid.style(height=365, width=365)
output_vid.style(height=365, width=365)
with gr.Row():
tar_prompt = gr.Textbox(
label="Describe your edited video",
max_lines=1, value=""
)
# with gr.Group(visible=False) as share_btn_container:
# with gr.Group(elem_id="share-btn-container"):
# community_icon = gr.HTML(community_icon_html, visible=True)
# loading_icon = gr.HTML(loading_icon_html, visible=False)
# share_button = gr.Button("Share to community", elem_id="share-btn", visible=True)
# with gr.Row():
# inversion_progress = gr.Textbox(visible=False, label="Inversion progress")
with gr.Row():
run_button = gr.Button("Edit your video!", visible=True)
with gr.Accordion("Advanced Options", open=False):
with gr.Tabs() as tabs:
with gr.TabItem('General options', id=2):
with gr.Row():
with gr.Column(min_width=100):
seed = gr.Number(value=0, precision=0, label="Seed", interactive=True)
randomize_seed = gr.Checkbox(label='Randomize seed', value=False)
steps = gr.Slider(label='Inversion steps', minimum=100, maximum=500,
value=500, step=1, interactive=True)
with gr.Column(min_width=100):
inversion_prompt = gr.Textbox(lines=1, label="Inversion prompt", interactive=True, placeholder="")
batch_size = gr.Slider(label='Batch size', minimum=1, maximum=10,
value=8, step=1, interactive=True)
n_frames = gr.Slider(label='Num frames', minimum=20, maximum=200,
value=40, step=1, interactive=True)
input_vid.change(
fn = reset_do_inversion,
outputs = [do_inversion],
queue = False)
input_vid.upload(
fn = reset_do_inversion,
outputs = [do_inversion],
queue = False).then(fn = preprocess_and_invert,
inputs = [input_vid,
frames,
latents,
inverted_latents,
seed,
randomize_seed,
do_inversion,
steps,
batch_size,
n_frames,
inversion_prompt
],
outputs = [frames,
latents,
inverted_latents,
do_inversion
])
demo.queue()
demo.launch()