Spaces:

loveu-tgve
/

loveu-tgve-leaderboard

Running

App Files Files Community

loveu-tgve-leaderboard / app.py

xiuyul

initial files upload

c95d2d4 about 1 year ago

raw

history blame

7.1 kB

	import os
	import gradio as gr
	import pandas as pd


	BASELINE = f'<a target="_blank" href=https://github.com/showlab/loveu-tgve-2023 style="color: blue; text-decoration: underline;text-decoration-style: dotted;">Tune-A-Video (Baseline)</a>'
	COLS = ["Method", "CLIPScore (Frame Consistency) ⬆️", "CLIPScore (Text Alignment) ⬆️", "PickScore ⬆️", "Human Preference ⬆️", "References"]
	TYPES = ["markdown", "number", "number", "number", "str", "markdown"]


	def get_leaderboard():
	all_data = []

	baseline_0 = {
	"Method": 'Tune-A-Video',
	"CLIPScore (Frame Consistency) ⬆️":0.92,
	"CLIPScore (Text Alignment) ⬆️":27.12,
	"PickScore ⬆️":20.36,
	"Human Preference ⬆️":'',
	"References": ','.join([f'<a target="_blank" href="https://arxiv.org/abs/2212.11565" style="color: blue">Paper</a>',
	f'<a target="_blank" href="https://github.com/showlab/Tune-A-Video" style="color: blue">Code</a>',
	f'<a target="_blank" href="https://tuneavideo.github.io/" style="color: blue">Website</a>',
	f'<a target="_blank" href="https://huggingface.co/spaces/Tune-A-Video-library/Tune-A-Video-inference" style="color: blue">Demo</a>'])
	}
	baseline_1 = {
	"Method": 'VideoCrafter (todo)',
	"References": ','.join([f'<a target="_blank" href="https://github.com/VideoCrafter/VideoCrafter" style="color: blue">Code</a>',
	f'<a target="_blank" href="https://huggingface.co/spaces/VideoCrafter/VideoCrafter" style="color: blue">Demo</a>'])
	}
	all_data += [baseline_0, baseline_1]

	dataframe = pd.DataFrame.from_records(all_data)
	dataframe = dataframe.sort_values(by=['PickScore ⬆️'], ascending=False)
	print(dataframe)
	dataframe = dataframe[COLS]
	return dataframe

	leaderboard = get_leaderboard()

	def refresh():
	return get_leaderboard()

	def load_edited_video(source_video, *args):
	result = source_video.split('/')[-1].split('.mp4')[0] + '-edit.mp4'
	return os.path.join(os.path.dirname(__file__), f"files/{result}")


	block = gr.Blocks()
	with block:
	with gr.Tab("Leaderboard"):
	with gr.Row():
	gr.Markdown(f"""
	# 🤗 LOVEU-TGVE @ CVPR 2023 Leaderboard
	<font size="4">
	<b>Welcome to the <a href="https://sites.google.com/view/loveucvpr23/track4" target="_blank">Text-Guided Video Editing (TGVE)</a> competition leaderboard of <a href="https://sites.google.com/view/loveucvpr23/home" target="_blank">LOVEU Workshop @ CVPR 2023</a>!</b>

	Leveraging AI for video editing has the potential to unleash creativity for artists across all skill levels. The rapidly-advancing field of Text-Guided Video Editing (TGVE) is here to address this challenge. Recent works in this field include <a href="https://tuneavideo.github.io/" target="_blank">Tune-A-Video</a>, <a href="https://research.runwayml.com/gen2" target="_blank">Gen-2</a>, and <a href="https://dreamix-video-editing.github.io/" target="_blank">Dreamix</a>.
	In this competition track, we provide a standard set of videos and prompts. As a researcher, you will develop a model that takes a video and a prompt for how to edit it, and your model will produce an edited video. For instance, you might be given a video of “a man is surfing inside the barrel of a wave,” and your model will edit the video to “a man is surfing on a wave made of aurora borealis.”

	During the competition, evaluation results performed against the following 3 automatic metrics will be displayed on the leaderboard:
	- <a href="https://arxiv.org/abs/2103.00020" target="_blank">CLIPScore</a> (Frame Consistency) - the average cosine similarity between all pairs of CLIP image embeddings computed on all frames of output videos.
	- <a href="https://arxiv.org/abs/2103.00020" target="_blank">CLIPScore</a> (Text Alignment) - the average CLIP score between all frames of output videos and corresponding edited prompts.
	- <a href="https://arxiv.org/abs/2305.01569" target="_blank">PickScore</a> - the average PickScore between all frames of output videos.

	After all submissions are uploaded, we will run a human-evaluation of all submitted videos. Specifically, we will have human labelers compare all submitted videos. Labelers will evaluate videos on the following criteria:

	- Text alignment: How well does the generated video match the caption?
	- Structure: How well does the generated video preserve the structure of the original video?
	- Quality: Aesthetically, how good is this video?

	We will choose a winner and a runner-up based on the human evaluation results.
	</font>

	The bold method name indicates that the implementation is official (by the author / developer of the original method).""")

	with gr.Row():
	leaderboard_table = gr.components.Dataframe(value=leaderboard, headers=COLS,
	datatype=TYPES, max_rows=10)
	with gr.Row():
	refresh_button = gr.Button("Refresh")
	refresh_button.click(refresh, inputs=[], outputs=[leaderboard_table])
	block.load(refresh, inputs=[], outputs=[leaderboard_table])

	with gr.Tab("Baseline Demo"):
	with gr.Row():
	gr.Markdown(f"""Some examples generated by {BASELINE} are shown below.""")
	with gr.Row():
	with gr.Column():
	source_video = gr.Video(type="file", label='Source Video', format="mp4", interactive=True)
	source_prompt = gr.Textbox(label='Source Prompt',
	# info='A good prompt describes each frame and most objects in video. Especially, it has the object or attribute that we want to edit or preserve.',
	max_lines=2,
	placeholder='Example: "A cat in the grass in the sun."',
	# value='A cat in the grass in the sun.'
	)

	with gr.Column():
	result = gr.Video(type="file", label='Edited Video', format="mp4", interactive=True)
	editing_prompt = gr.Textbox(label='Editing Prompt',
	# info='A reasonable composition of video may achieve better results(e.g., "sunflower" video with "Van Gogh" prompt is better than "sunflower" with "Monet")',
	max_lines=2,
	placeholder='Example: "A dog in the grass in the sun."',
	# value='A dog in the grass in the sun.'
	)

	with gr.Row():
	from example import examples
	gr.Examples(examples=examples,
	inputs=[source_video, source_prompt, editing_prompt],
	outputs=result,
	fn=load_edited_video,
	cache_examples=True,
	)
	block.launch()