import json import gradio as gr import os from fastapi import FastAPI import uvicorn from huggingface_hub import hf_hub_download def see_files(path): return os.listdir(path) def unzip_file(filename): cmd = f"unzip -P lhchensllu ./Movid/{filename} -d ./videos" os.system(cmd) def greet(name): return "Hello " + name + "!!" os.system("mkdir Movid") os.system("mkdir videos") REPO_ID = "EvanTHU/MoVid" filelist = ["animation", "dance", "haa500", "humman", "idea400", "kungfu", "music", "perform"] for file_ in filelist: filename = f"{file_}-video.zip" file = hf_hub_download(repo_id=REPO_ID, filename=filename, repo_type="dataset", local_dir="./Movid") print(file) unzip_file(filename) hf_hub_download(repo_id=REPO_ID, filename="video-QA.json", repo_type="dataset", local_dir="./") print(see_files("./")) print(see_files("./videos")) # 读取 JSON 数据 with open('video-QA.json', 'r') as f: data = json.load(f) # 提取需要的信息 instructions = [item.get('instruction', 'N/A') for item in data] input_videos = [item.get('input', 'N/A') for item in data] output_captions = [item.get('output', 'N/A') for item in data] title_markdown = ("""

MoVid Video QA Dataset Visualization

Dataset contact: Ling-Hao Chen (THU, IDEA), Shunlin Lu (CUHK-SZ, IDEA), Other contributors: Yuhong Zhang (THU, IDEA).

""") # 获取根目录下的所有一级子目录 def get_subdirs(directory): list_ = [d for d in os.listdir(directory) if os.path.isdir(os.path.join(directory, d))] for item in list_: if "idea400-release" in item: list_.remove(item) return list_ # 定义一个函数来获取下一级子目录或视频文件 def get_next_level(directory): items = os.listdir(directory) subdirs = [d for d in items if os.path.isdir(os.path.join(directory, d))] videos = [f for f in items if f.endswith('.mp4')] return subdirs, videos # 定义一个函数来展示选定的视频及其相关信息 def display_video(subset1, subset2, video): index = input_videos.index(f"./videos/{subset1}/{subset2}/{video}") instruction = instructions[index] input_video = f"./videos/{subset1}/{subset2}/{video}" output_caption = output_captions[index] return instruction, input_video, output_caption # 创建 Gradio 界面 def create_demo(): with gr.Blocks() as demo: css = """.large-font{ font-size: 40px; } .gr-video{ width: 70%; max-width: 640px; height: auto; } """ # gr.Markdown("# MoVid Video QA Dataset Visualization") gr.Markdown(title_markdown) root_dir = "./videos" subset1_dirs = get_subdirs(root_dir) with gr.Row(): subset1 = gr.Dropdown(choices=subset1_dirs, label="Select MoVid Subset") subset2 = gr.Dropdown(choices=[], label="Select Split") video = gr.Dropdown(choices=[], label="Select Video File") instruction_output = gr.Textbox(label="Question", elem_classes=["large-font"]) caption_output = gr.Textbox(label="Answer", elem_classes=["large-font"]) video_output = gr.Video(label="Input Video", elem_classes=["large-font"]) def update_subset2(subset1): subset2_dirs, _ = get_next_level(os.path.join(root_dir, subset1)) return gr.Dropdown(choices=subset2_dirs, nteractive=True), gr.Dropdown(choices=[], interactive=True) def update_videos(subset1, subset2): _, videos = get_next_level(os.path.join(root_dir, subset1, subset2)) return gr.Dropdown(choices=videos, interactive=True) subset1.change(update_subset2, inputs=subset1, outputs=[subset2, video]) subset2.change(update_videos, inputs=[subset1, subset2], outputs=video) video.change(display_video, inputs=[subset1, subset2, video], outputs=[instruction_output, video_output, caption_output]) return demo # 启动 Gradio Demo demo = create_demo() # define port demo.launch()