import math import random import gradio as gr import os import json # Paths to the JSON files json_files = { "Kinetics700": "kinetics700_tune_.json", "STAR-benchmark": "starb_tune_.json", "FineDiving": "finediving_tune_.json" } VIDEO_NAME = 'video_name' QUESTION = 'question' LABEL = 'label' PREDICTION = 'prediction' left_side_columns = [VIDEO_NAME] right_side_columns = [QUESTION, LABEL, PREDICTION] batch_size = 8 target_size = (1024, 1024) def func(index, dataset): json_file = json_files[dataset] start_index = index * batch_size end_index = start_index + batch_size with open(json_file, 'r') as f: data = json.load(f) all_examples = data[start_index:end_index] values_lst = [] for example_idx, example in enumerate(all_examples): values = get_instance_values(example, dataset) values_lst += values return values_lst def get_instance_values(example, dataset_name): example[VIDEO_NAME] = os.path.abspath(os.path.join(dataset_name, example[VIDEO_NAME])) values = [] for k in left_side_columns + right_side_columns: value = example[k] values.append(value) return values demo = gr.Blocks() def get_col(example, dataset_name): instance_values = get_instance_values(example, dataset_name) with gr.Column(): inputs_left = [] assert len(left_side_columns) == len(instance_values[:len(left_side_columns)]) # excluding the video for key, value in zip(left_side_columns, instance_values[:len(left_side_columns)]): if key == VIDEO_NAME: if os.path.exists(value): # Check if the video file exists input_k = gr.Video(value=value) else: input_k = gr.Textbox(value=f"Video file not found: {value}", label=f"{key.capitalize()}") else: label = key.capitalize().replace("_", " ") input_k = gr.Textbox(value=value, label=f"{label}") inputs_left.append(input_k) with gr.Accordion("Click for details", open=False): text_inputs_right = [] assert len(right_side_columns) == len(instance_values[len(left_side_columns):]) for key, value in zip(right_side_columns, instance_values[len(left_side_columns):]): label = key.capitalize().replace("_", " ") if key == PREDICTION: text_input_k = gr.Textbox(value=value, label=f"{label}", lines=7) else: text_input_k = gr.Textbox(value=value, label=f"{label}") text_inputs_right.append(text_input_k) return inputs_left, text_inputs_right with demo: gr.Markdown("# Slide to iterate videos") with gr.Column(): dataset_dropdown = gr.Dropdown(choices=list(json_files.keys()), label="Select Dataset", value="Kinetics700") slider = gr.Slider(minimum=0, maximum=math.ceil(500 / batch_size), step=1, label='Page') # Assuming 500 samples per dataset with gr.Row(): index = slider.value dataset = dataset_dropdown.value start_index = 0 * batch_size end_index = start_index + batch_size with open(json_files[dataset], 'r') as f: data = json.load(f) all_examples = data[start_index:end_index] all_inputs_left_right = [] for example_idx, example in enumerate(all_examples): inputs_left, text_inputs_right = get_col(example, dataset) inputs_left_right = inputs_left + text_inputs_right all_inputs_left_right += inputs_left_right slider.change(func, inputs=[slider, dataset_dropdown], outputs=all_inputs_left_right) dataset_dropdown.change(func, inputs=[slider, dataset_dropdown], outputs=all_inputs_left_right) demo.launch()