# # Importing the requirements # import warnings # warnings.filterwarnings("ignore") # import gradio as gr # from src.video_model import describe_video # # Video and text inputs for the interface # video = gr.Video(label="Video") # query = gr.Textbox(label="Question", placeholder="Enter your question here") # # Output for the interface # response = gr.Textbox(label="Predicted answer", show_label=True, show_copy_button=True) # # Examples for the interface # examples = [ # [ # "videos/2016-01-01_0100_US_KNBC_Channel_4_News_1867.16-1871.38_now.mp4", # "Here are some frames of a video. Describe this video in detail." # ], # [ # "videos/2016-01-01_0200_US_KNBC_Channel_4_News_1329.12-1333.29_tonight.mp4", # "Here are some frames of a video. Describe this video in detail." # ], # [ "videos/2016-01-01_0830_US_KNBC_Tonight_Show_with_Jimmy_Fallon_725.45-729.76_tonight.mp4", # "Here are some frames of a video. Describe this video in detail." # ] # ] # # Title, description, and article for the interface # title = "GSoC Super Raid Annotator" # description = "Annotate Videos" # article = "

Model GitHub Repo | Model Page

" # # Launch the interface # interface = gr.Interface( # fn=describe_video, # inputs=[video, query], # outputs=response, # examples=examples, # title=title, # description=description, # article=article, # theme="Soft", # allow_flagging="never", # ) # interface.launch(debug=False) import warnings warnings.filterwarnings("ignore") import gradio as gr from src.video_model import describe_video # Assuming this function processes the video and query # --- Function to construct the final query --- def process_video_and_questions(video, sitting, hands, location, screen): query = "Describe this video in detail and answer the questions" additional_info = [] if sitting: additional_info.append("Is the subject in the video standing or sitting?") if hands: additional_info.append("Is the subject holding any object in their hands, if so the hands are not free else they are free?") if location: additional_info.append("Is the subject present indoors or outdoors?") if screen: additional_info.append("Is the subject interacting with a screen in the background by facing the screen?") final_query = query + " " + " ".join(additional_info) # Assuming your describe_video function handles the video processing response = describe_video(video, final_query) return response # Video and text inputs for the interface video = gr.Video(label="Video") # Options as checkboxes sitting = gr.Checkbox(label="Sitting/Standing") hands = gr.Checkbox(label="Hands Free/Not Free") location = gr.Checkbox(label="Indoors/Outdoors") screen = gr.Checkbox(label="Screen Interaction") # Output for the interface response = gr.Textbox(label="Predicted answer", show_label=True, show_copy_button=True) # Examples for the interface examples = [ ["videos/2016-01-01_0100_US_KNBC_Channel_4_News_1867.16-1871.38_now.mp4",], ["videos/2016-01-01_0200_US_KNBC_Channel_4_News_1329.12-1333.29_tonight.mp4",], ["videos/2016-01-01_0830_US_KNBC_Tonight_Show_with_Jimmy_Fallon_725.45-729.76_tonight.mp4",], ["videos/2016-01-01_0200_US_KOCE_The_PBS_Newshour_577.03-581.31_tonight.mp4"], ["videos/2016-01-01_1400_US_KTTV-FOX_Morning_News_at_6AM_1842.36-1846.68_this_year.mp4"], ["videos/2016-01-02_0735_US_KCBS_Late_Show_with_Stephen_Colbert_285.94-290.67_this_year.mp4"], ["videos/2016-01-13_2200_US_KTTV-FOX_The_Doctor_Oz_Show_1709.79-1714.17_this_month.mp4"], ["videos/2016-01-01_1400_US_KTTV-FOX_Morning_News_at_6AM_1842.36-1846.68_this_year.mp4"], ["videos/2016-01-01_1300_US_KNBC_Today_in_LA_at_5am_12.46-16.95_this_morning.mp4"], ["videos/2016-01-05_0200_US_KNBC_Channel_4_News_1561.29-1565.95_next_week.mp4"], ["videos/2016-01-28_0700_US_KNBC_Channel_4_News_at_11PM_629.56-633.99_in_the_future.mp4"] ] # Title, description, and article for the interface title = "GSoC Super Raid Annotator" description = "Annotate Videos" article = "

Model GitHub Repo | Model Page

" custom_theme = gr.themes.Soft( # Set the primary hue of the Soft theme to your red color primary_hue="red", secondary_hue="red") # Launch the interface interface = gr.Interface( fn=process_video_and_questions, # Updated function to handle the query construction inputs=[video, sitting, hands, location, screen], outputs=response, examples=examples, title=title, description=description, article=article, theme=custom_theme, allow_flagging="never", ) interface.launch(debug=False)