import os import gradio as gr import pandas as pd BASELINE = f'Tune-A-Video (Baseline)' COLS = ["Method", "CLIPScore (Frame Consistency) ⬆️", "CLIPScore (Text Alignment) ⬆️", "PickScore ⬆️", "Human Preference ⬆️", "References"] TYPES = ["markdown", "number", "number", "number", "str", "markdown"] def get_leaderboard(): all_data = [] baseline_0 = { "Method": '**Tune-A-Video**', "CLIPScore (Frame Consistency) ⬆️":0.92, "CLIPScore (Text Alignment) ⬆️":27.12, "PickScore ⬆️":20.36, "Human Preference ⬆️":'', "References": ','.join([f'Paper', f'Code', f'Website', f'Demo']) } baseline_1 = { "Method": 'VideoCrafter (todo)', "References": ','.join([f'Code', f'Demo']) } all_data += [baseline_0, baseline_1] dataframe = pd.DataFrame.from_records(all_data) dataframe = dataframe.sort_values(by=['PickScore ⬆️'], ascending=False) print(dataframe) dataframe = dataframe[COLS] return dataframe leaderboard = get_leaderboard() def refresh(): return get_leaderboard() def load_edited_video(source_video, *args): result = source_video.split('/')[-1].split('.mp4')[0] + '-edit.mp4' return os.path.join(os.path.dirname(__file__), f"files/{result}") block = gr.Blocks() with block: with gr.Tab("Leaderboard"): with gr.Row(): gr.Markdown(f""" # 🤗 LOVEU-TGVE @ CVPR 2023 Leaderboard Welcome to the Text-Guided Video Editing (TGVE) competition leaderboard of LOVEU Workshop @ CVPR 2023! Leveraging AI for video editing has the potential to unleash creativity for artists across all skill levels. The rapidly-advancing field of Text-Guided Video Editing (TGVE) is here to address this challenge. Recent works in this field include Tune-A-Video, Gen-2, and Dreamix. In this competition track, we provide a standard set of videos and prompts. As a researcher, you will develop a model that takes a video and a prompt for how to edit it, and your model will produce an edited video. For instance, you might be given a video of “a man is surfing inside the barrel of a wave,” and your model will edit the video to “a man is surfing on a wave made of aurora borealis.” During the competition, evaluation results performed against the following 3 automatic metrics will be displayed on the leaderboard: - CLIPScore (Frame Consistency) - the average cosine similarity between all pairs of CLIP image embeddings computed on all frames of output videos. - CLIPScore (Text Alignment) - the average CLIP score between all frames of output videos and corresponding edited prompts. - PickScore - the average PickScore between all frames of output videos. After all submissions are uploaded, we will run a human-evaluation of all submitted videos. Specifically, we will have human labelers compare all submitted videos. Labelers will evaluate videos on the following criteria: - Text alignment: How well does the generated video match the caption? - Structure: How well does the generated video preserve the structure of the original video? - Quality: Aesthetically, how good is this video? We will choose a winner and a runner-up based on the human evaluation results. The **bold** method name indicates that the implementation is **official** (by the author / developer of the original method).""") with gr.Row(): leaderboard_table = gr.components.Dataframe(value=leaderboard, headers=COLS, datatype=TYPES, max_rows=10) with gr.Row(): refresh_button = gr.Button("Refresh") refresh_button.click(refresh, inputs=[], outputs=[leaderboard_table]) block.load(refresh, inputs=[], outputs=[leaderboard_table]) with gr.Tab("Baseline Demo"): with gr.Row(): gr.Markdown(f"""Some examples generated by {BASELINE} are shown below.""") with gr.Row(): with gr.Column(): source_video = gr.Video(type="file", label='Source Video', format="mp4", interactive=True) source_prompt = gr.Textbox(label='Source Prompt', # info='A good prompt describes each frame and most objects in video. Especially, it has the object or attribute that we want to edit or preserve.', max_lines=2, placeholder='Example: "A cat in the grass in the sun."', # value='A cat in the grass in the sun.' ) with gr.Column(): result = gr.Video(type="file", label='Edited Video', format="mp4", interactive=True) editing_prompt = gr.Textbox(label='Editing Prompt', # info='A reasonable composition of video may achieve better results(e.g., "sunflower" video with "Van Gogh" prompt is better than "sunflower" with "Monet")', max_lines=2, placeholder='Example: "A dog in the grass in the sun."', # value='A dog in the grass in the sun.' ) with gr.Row(): from example import examples gr.Examples(examples=examples, inputs=[source_video, source_prompt, editing_prompt], outputs=result, fn=load_edited_video, cache_examples=True, ) block.launch()