Spaces:

ManishThota
/

GSoC-Super-Rapid-Annotator

Runtime error

File size: 3,714 Bytes

0ec47c6
5fb8331
 
 
0ec47c6
 
5fb8331
0ec47c6
 
 
9871891
 
 
 
c78e00d
9871891
 
 
 
0ec47c6
9871891
5fb8331
0ec47c6
 
3fdad44
08ba28a
3fdad44
 
 
 
 
0ec47c6
92f266c
 
 
 
9871891
0ec47c6
 
5fb8331
60a8041
 
 
5fb8331
 
c837039
 
 
 
 
 
87cbf92
 
 
 
8a67c3f
5fb8331
 
 
 
e80df4f
5fb8331
 
ce45613
a9d7425
 
d0ede9f
0ec47c6
92f266c
 
0ec47c6
 
2b0ca40
92f266c
 
 
 
 
 
08ba28a
60a8041
 
 
 
 
 
 
 
9871891

# --- main.py (your Gradio app file) ---
import warnings
warnings.filterwarnings("ignore")
import gradio as gr
from src.video_model import describe_video  # Your video processing function
from src.text_processor import process_description

# --- Function to handle both video and text processing ---
def process_video_and_get_json(video, sitting, hands, location, screen):
    query = "Describe this video in detail and answer the questions."
    additional_info = []
    if sitting:
        additional_info.append("Is the subject in the video standing or sitting?")
    if hands:
        additional_info.append("Is the subject holding any object in their hands, if so the hands are not free else they are free?")
    if location:
        additional_info.append("Is the subject present indoors or outdoors?")
    if screen:
        additional_info.append("Is the subject interacting with a screen in the background by facing the screen?")

    final_query = query + " " + " ".join(additional_info)

    video_description = describe_video(video, final_query)

    return video_description

def process_and_display_json(video_description): 
    json_response = process_description(video_description)
    return json_response
    
    # --- Gradio Interface --- 
video = gr.Video(label="Video")
sitting = gr.Checkbox(label="Sitting/Standing")
hands = gr.Checkbox(label="Hands Free/Not Free")
location = gr.Checkbox(label="Indoors/Outdoors")
screen = gr.Checkbox(label="Screen Interaction")

video_description = gr.Textbox(label="Video Description", show_label=True, show_copy_button=True)
json_output = gr.JSON(label="JSON Output") 

# Button to trigger JSON processing
process_json_button = gr.Button("Process JSON")

# Examples for the interface
examples = [
    ["videos/2016-01-01_0100_US_KNBC_Channel_4_News_1867.16-1871.38_now.mp4",],
    ["videos/2016-01-01_0200_US_KNBC_Channel_4_News_1329.12-1333.29_tonight.mp4",],
    ["videos/2016-01-01_0830_US_KNBC_Tonight_Show_with_Jimmy_Fallon_725.45-729.76_tonight.mp4",],
    ["videos/2016-01-01_0200_US_KOCE_The_PBS_Newshour_577.03-581.31_tonight.mp4"],
    ["videos/2016-01-01_1400_US_KTTV-FOX_Morning_News_at_6AM_1842.36-1846.68_this_year.mp4"],
    ["videos/2016-01-02_0735_US_KCBS_Late_Show_with_Stephen_Colbert_285.94-290.67_this_year.mp4"],
    ["videos/2016-01-13_2200_US_KTTV-FOX_The_Doctor_Oz_Show_1709.79-1714.17_this_month.mp4"],
    ["videos/2016-01-01_1400_US_KTTV-FOX_Morning_News_at_6AM_1842.36-1846.68_this_year.mp4"],
    ["videos/2016-01-01_1300_US_KNBC_Today_in_LA_at_5am_12.46-16.95_this_morning.mp4"],
    ["videos/2016-01-05_0200_US_KNBC_Channel_4_News_1561.29-1565.95_next_week.mp4"],
    ["videos/2016-01-28_0700_US_KNBC_Channel_4_News_at_11PM_629.56-633.99_in_the_future.mp4"]
]

# Title, description, and article for the interface
title = "GSoC Super Raid Annotator"
description = "Annotate Videos"
article = "<p style='text-align: center'><a href='https://github.com/OpenBMB/MiniCPM-V' target='_blank'>Model GitHub Repo</a> | <a href='https://huggingface.co/openbmb/MiniCPM-V-2_6' target='_blank'>Model Page</a></p>"

custom_theme = gr.themes.Soft(
    primary_hue="blue", 
    secondary_hue="blue",
    neutral_hue="zinc"
)

interface = gr.Interface(
    fn=process_video_and_get_json, 
    inputs=[video, sitting, hands, location, screen],
    outputs=video_description, 
    examples=examples,
    title=title,
    description=description,
    article=article,
    theme=custom_theme,
    allow_flagging="never",
)

# Button click event to process JSON
process_json_button.click(
    fn=process_and_display_json, 
    inputs=video_description, # Take video description as input
    outputs=json_output
) 

interface.launch(debug=False)