# Importing the requirements import warnings warnings.filterwarnings("ignore") import gradio as gr from src.video_model import describe_video # Video and text inputs for the interface video = gr.Video(label="Video") query = gr.Textbox(label="Question", placeholder="Enter your question here") # Output for the interface response = gr.Textbox(label="Predicted answer", show_label=True, show_copy_button=True) # Examples for the interface examples = [ [ "videos/2016-01-01_0100_US_KNBC_Channel_4_News_1867.16-1871.38_now.mp4", "Here are some frames of a video. Describe this video in detail.", ] # [ # ".videos/2016-01-01_0200_US_KNBC_Channel_4_News_1329.12-1333.29_tonight.mp4", # "Here are some frames of a video. Describe this video in detail.", # ], # [" .videos/2016-01-01_0830_US_KNBC_Tonight_Show_with_Jimmy_Fallon_725.45-729.76_tonight.mp4", # "Here are some frames of a video. Describe this video in detail.",], ] # Title, description, and article for the interface title = "GSoC Super Raid Annotator" # description = "Gradio Demo for the MiniCPM-V 2.6 Vision Language Understanding and Generation model. This model can answer questions about videos in natural language. To use it, simply upload your video, type a question, and click 'submit', or click one of the examples to load them. Read more at the links below." article = "

Model GitHub Repo | Model Page

" # Launch the interface interface = gr.Interface( fn=describe_video, inputs=[video, query], outputs=response, examples=examples, title=title, description=description, article=article, theme="Soft", allow_flagging="never", ) interface.launch(debug=False)