Spaces:

XiangpengYang
/

VideoGrain

Runtime error

VideoGrain / app.py

9dbf912 7 days ago

11.3 kB

	#!/usr/bin/env python

	from __future__ import annotations

	import os

	import gradio as gr

	from webui.merge_config_gradio import merge_config_then_run

	import huggingface_hub
	import shutil
	import os
	import torch

	HF_TOKEN = os.getenv('HF_TOKEN')
	pipe = merge_config_then_run()


	ARTICLE = r"""
	If VideoGrain is helpful, please help to ⭐ the <a href='https://github.com/knightyxp/VideoGrain' target='_blank'>Github Repo</a>. Thanks!
	[![GitHub Stars](https://img.shields.io/github/stars/knightyxp/VideoGrain?style=social)](https://github.com/knightyxp/VideoGrain)
	---
	📝 Citation
	If our work is useful for your research, please consider citing:
	```bibtex
	@article{yang2025videograin,
	title={VideoGrain: Modulating Space-Time Attention for Multi-grained Video Editing},
	author={Yang, Xiangpeng and Zhu, Linchao and Fan, Hehe and Yang, Yi},
	journal={ICLR},
	year={2025}
	}
	```
	📋 License
	This project is licensed under <a rel="license" href="https://github.com/knightyxp/VideoGrain?tab=License-1-ov-file#readme">ReLER-Lab License 1.0</a>.
	Redistribution and use for non-commercial purposes should follow this license.
	📧 Contact
	If you have any questions, please feel free to reach me out at <b>knightyxp@gmail.com</b>.
	"""


	def update_layout_visibility(selected_num):
	num = int(selected_num)
	return [gr.update(visible=(i < num)) for i in range(len(layout_files))]


	with gr.Blocks(css='style.css') as demo:
	# gr.Markdown(TITLE)

	gr.HTML(
	"""
	<div style="text-align: center; max-width: 1200px; margin: 20px auto;">
	<h1 style="font-weight: 900; font-size: 2rem; margin: 0rem">
	VideoGrain: Modulating Space-Time Attention for Multi-Grained Video Editing
	</h1>
	<h2 style="font-weight: 450; font-size: 1rem; margin: 0rem">
	<a href="https://github.com/knightyxp">Xiangpeng Yang</a>
	</h2>
	<h2 style="font-weight: 450; font-size: 1rem; margin: 0rem">
	<span class="link-block">
	[<a href="https://arxiv.org/abs/2502.17258" target="_blank"
	class="external-link ">
	<span class="icon">
	<i class="ai ai-arxiv"></i>
	</span>
	<span>arXiv</span>
	</a>]
	</span>
	<!-- Github link -->
	<span class="link-block">
	[<a href="https://github.com/knightyxp/VideoGrain" target="_blank"
	class="external-link ">
	<span class="icon">
	<i class="fab fa-github"></i>
	</span>
	<span>Code</span>
	</a>]
	</span>
	<!-- Github link -->
	<span class="link-block">
	[<a href="https://knightyxp.github.io/VideoGrain_project_page" target="_blank"
	class="external-link ">
	<span class="icon">
	<i class="fab fa-github"></i>
	</span>
	<span>Homepage</span>
	</a>]
	</span>
	<!-- Github link -->
	<span class="link-block">
	[<a href="https://www.youtube.com/watch?v=XEM4Pex7F9E" target="_blank"
	class="external-link ">
	<span class="icon">
	<i class="fab fa-youtube"></i>
	</span>
	<span>Youtube Video</span>
	</a>]
	</span>
	</h2>
	<h2 style="font-weight: 450; font-size: 1rem; margin-top: 0.5rem; margin-bottom: 0.5rem">
	📕 TL;DR: VideoGrain is a zero-shot method for class-level, instance-level, and part-level video editing
	</h2>
	<h2 style="font-weight: 450; font-size: 1rem;">
	Note that this page is a limited demo of VideoGrain. To run with more configurations, please check out our <a href="https://github.com/knightyxp/VideoGrain">github page.
	</h2>
	</div>
	""")


	gr.HTML("""
	<p>We provide an <a href="https://github.com/knightyxp/VideoGrain?tab=readme-ov-file#editing-guidance-for-your-video"> Editing Guidance </a> to help users to choose hyperparameters when editing in-the-wild video.
	<p>To remove the limitations or avoid queue on your own hardware, you may <a href="https://huggingface.co/spaces/XiangpengYang/VideoGrain?duplicate=true" style="display: inline-block; vertical-align: middle;"><img style="margin-top: 0em; margin-bottom: 0em; display: inline-block;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a></p>
	""")

	with gr.Row():
	with gr.Column():
	with gr.Accordion('Input Video', open=True):
	# user_input_video = gr.File(label='Input Source Video')
	user_input_video = gr.Video(label='Input Source Video', source='upload', type='numpy', format="mp4", visible=True).style(height="auto")


	# Radio to choose how many layout videos to show
	num_layouts = gr.Radio(
	choices=["2", "3", "4", "5"],
	label="Select Number of Editing Areas",
	value="2", # default
	info="Please select the number of editing areas"
	)

	# 使用循环生成所有的布局视频组件，并存到列表 layout_files 中
	layout_files = []
	with gr.Row():
	for i in range(5):
	video = gr.Video(
	label=f"Layout Video {i+1}",
	type="numpy",
	format="mp4",
	visible=(i < 2) # 默认显示前两个
	)
	layout_files.append(video)

	# 当 num_layouts 改变时，通过回调函数更新 layout_files 列表中各视频组件的 visible 属性
	num_layouts.change(
	fn=update_layout_visibility,
	inputs=num_layouts,
	outputs=layout_files
	)

	prompt = gr.Textbox(label='Prompt',
	info='Change the prompt, and extract each local prompt in the editing prompts.\
	(the local prompt order should be same as layout masks order.)',
	)

	model_id = gr.Dropdown(
	label='Model ID',
	choices=[
	'stable-diffusion-v1-5/stable-diffusion-v1-5',
	# add shape editing ckpt here
	],
	value='stable-diffusion-v1-5/stable-diffusion-v1-5')


	with gr.Column():
	result = gr.Video(label='Result')
	# result.style(height=512, width=512)
	with gr.Accordion('Temporal Crop offset and Sampling Stride', open=False):
	n_sample_frame = gr.Slider(label='Number of Frames',
	minimum=0,
	maximum=32,
	step=1,
	value=16)
	sampling_rate = gr.Slider(label='sampling_rate',
	minimum=0,
	maximum=20,
	step=1,
	value=1)
	start_sample_frame = gr.Number(label='Start frame in the video',
	value=0,
	precision=0)


	with gr.Row():
	control_list = ['dwpose', 'depth_zoe', 'depth_midas']
	control_type = gr.Dropdown(
	choices=control_list,
	label='Control type',
	value='dwpose'
	)

	# Checkbox group for "dwpose" options; default: hand selected, face not selected.
	dwpose_options = gr.CheckboxGroup(
	choices=["hand", "face"],
	label="DW Pose Options",
	value=["hand"],
	visible=True # Initially visible since default control_type is "dwpose"
	)

	# Update the visibility of the dwpose_options based on the selected control type
	control_type.change(
	fn=lambda x: gr.update(visible=(x == "dwpose")),
	inputs=control_type,
	outputs=dwpose_options
	)

	controlnet_conditioning_scale = gr.Slider(label='ControlNet conditioning scale',
	minimum=0.0,
	maximum=1.0,
	value=1.0,
	step=0.1)

	with gr.Accordion('Editing config for VideoGrian', open=True):
	use_pnp = gr.Checkbox(
	label="Use PnP",
	value=False,
	info="Check to enable PnP functionality."
	)

	pnp_inject_steps = gr.Slider(label='pnp inject steps',
	info='PnP inject steps for temporal consistency',
	minimum=0,
	maximum=10,
	step=1,
	value=0)

	flatten_res = gr.CheckboxGroup(
	choices=["1", "2", "4", "8"],
	label="Flatten Resolution",
	value=["1"],
	info="Select one or more flatten resolution factors. Mapping: 1 -> 64, 2 -> 32 (64/2), 4 -> 16 (64/4), 8 -> 8 (64/8)."
	)


	run_button = gr.Button('Generate')

	with gr.Row():
	from example import style_example
	examples = style_example

	# gr.Examples(examples=examples,
	# inputs=[
	# model_id,
	# user_input_video,
	# layout_files,
	# prompt,
	# model_id,
	# control_type,
	# dwpose_options,
	# controlnet_conditioning_scale,
	# use_pnp,
	# pnp_inject_steps,
	# flatten_res,
	# ],
	# outputs=result,
	# fn=pipe.run,
	# cache_examples=True,
	# # cache_examples=os.getenv('SYSTEM') == 'spaces'
	# )
	gr.Markdown(ARTICLE)
	inputs = [user_input_video, num_layouts,
	*layout_files,
	prompt,
	model_id,
	n_sample_frame,
	start_sample_frame,
	sampling_rate,
	control_type,
	dwpose_options,
	controlnet_conditioning_scale,
	use_pnp,
	pnp_inject_steps,
	flatten_res,
	]
	prompt.submit(fn=pipe.run, inputs=inputs, outputs=result)
	run_button.click(fn=pipe.run, inputs=inputs, outputs=result)

	demo.queue().launch(share=True)