# # Importing the requirements # import warnings # warnings.filterwarnings("ignore") # import gradio as gr # from src.video_model import describe_video # # Video and text inputs for the interface # video = gr.Video(label="Video") # query = gr.Textbox(label="Question", placeholder="Enter your question here") # # Output for the interface # response = gr.Textbox(label="Predicted answer", show_label=True, show_copy_button=True) # # Examples for the interface # examples = [ # [ # "videos/2016-01-01_0100_US_KNBC_Channel_4_News_1867.16-1871.38_now.mp4", # "Here are some frames of a video. Describe this video in detail." # ], # [ # "videos/2016-01-01_0200_US_KNBC_Channel_4_News_1329.12-1333.29_tonight.mp4", # "Here are some frames of a video. Describe this video in detail." # ], # [ "videos/2016-01-01_0830_US_KNBC_Tonight_Show_with_Jimmy_Fallon_725.45-729.76_tonight.mp4", # "Here are some frames of a video. Describe this video in detail." # ] # ] # # Title, description, and article for the interface # title = "GSoC Super Raid Annotator" # description = "Annotate Videos" # article = "

Model GitHub Repo | Model Page

" # # Launch the interface # interface = gr.Interface( # fn=describe_video, # inputs=[video, query], # outputs=response, # examples=examples, # title=title, # description=description, # article=article, # theme="Soft", # allow_flagging="never", # ) # interface.launch(debug=False) import warnings warnings.filterwarnings("ignore") import gradio as gr from src.video_model import describe_video # Assuming this function processes the video and query def process_video_and_questions(video, query, sitting, hands, location, screen): additional_info = [] if sitting: additional_info.append("Is the subject in the video standing or sitting?") if hands: additional_info.append("Is the subject holding any object in their hands, if so the hands are not free else they are free.?") if location: additional_info.append("Is the subject present indoors or outdoors?") if screen: additional_info.append("Is the subject interacting with a screen in the background by facing the screen?") final_query = query + " " + " ".join(additional_info) # Assuming your describe_video function handles the video processing response = describe_video(video, final_query) return response # Video and text inputs for the interface video = gr.Video(label="Video") query = gr.Textbox(label="Question", value="Here are some frames of a video. Describe this video in detail and answer the below questions.", placeholder="Enter your question here") # Options as checkboxes sitting = gr.Checkbox(label="Sitting/Standing") hands = gr.Checkbox(label="Hands Free/Not Free") location = gr.Checkbox(label="Indoors/Outdoors") screen = gr.Checkbox(label="Screen Interaction") # Output for the interface response = gr.Textbox(label="Predicted answer", show_label=True, show_copy_button=True) # Examples for the interface examples = [ [ "videos/2016-01-01_0100_US_KNBC_Channel_4_News_1867.16-1871.38_now.mp4", "Here are some frames of a video. Describe this video in detail." ], [ "videos/2016-01-01_0200_US_KNBC_Channel_4_News_1329.12-1333.29_tonight.mp4", "Here are some frames of a video. Describe this video in detail." ], [ "videos/2016-01-01_0830_US_KNBC_Tonight_Show_with_Jimmy_Fallon_725.45-729.76_tonight.mp4", "Here are some frames of a video. Describe this video in detail." ] ] # Title, description, and article for the interface title = "GSoC Super Raid Annotator" description = "Annotate Videos" article = "

Model GitHub Repo | Model Page

" # Launch the interface interface = gr.Interface( fn=process_video_and_questions, # Updated function to handle the query construction inputs=[video, query, sitting, hands, location, screen], outputs=response, examples=examples, title=title, description=description, article=article, theme="Soft", allow_flagging="never", ) interface.launch(debug=False)