Spaces:
Runtime error
Runtime error
File size: 5,472 Bytes
b3ca871 50ab392 05789a2 b3ca871 f9f1c5f 1e38ac4 05789a2 f9f1c5f b3ca871 1e38ac4 f9f1c5f 1e38ac4 f9f1c5f 1e38ac4 f9f1c5f 1e38ac4 3035f99 f9f1c5f 89b6fe6 b3ca871 78e2d46 05789a2 c935ba3 1e38ac4 9399086 b012810 9399086 f9f1c5f 1e38ac4 f9f1c5f 50ab392 1e38ac4 b012810 1e38ac4 b012810 b3ca871 50ab392 b3ca871 b012810 b3ca871 50ab392 1e38ac4 50ab392 b012810 50ab392 1e38ac4 50ab392 1e38ac4 50ab392 b012810 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import warnings
warnings.filterwarnings("ignore")
import gradio as gr
import pandas as pd
from src.video_model import describe_video
from src.utils import parse_string, parse_annotations
import os
# --- Function to construct the final query ---
def process_video_and_questions(video, standing, hands, location, screen):
video_name = os.path.basename(video)
query = f"Answer the questions from the video\n"
additional_info = []
if standing:
additional_info.append("Is the subject in the video standing or sitting?\n")
if hands:
additional_info.append("Is the subject holding any object in their hands?\n")
if location:
additional_info.append("Is the subject present indoors?\n")
if screen:
additional_info.append("Is the subject interacting with a screen in the background by facing the screen?\n")
end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Follow the below examples\n:
<annotation>indoors: 0</annotation>
<annotation>standing: 1</annotation>
<annotation>hands.free: 0</annotation>
<annotation>screen.interaction_yes: 0</annotation>
"""
final_query = query + " " + " ".join(additional_info)
final_prompt = final_query + " " + end_query
response = describe_video(video, final_prompt)
final_response = f"<video_name>{video_name}</video_name>" + " \n" + response
conditions = {
'standing': (standing, 'standing: 1', 'standing: None'),
'hands': (hands, 'hands.free: 1', 'hands.free: None'),
'location': (location, 'indoors: 1', 'indoors: None'),
'screen': (screen, 'screen.interaction_yes: 1', 'screen.interaction_yes: None')
}
for key, (condition, to_replace, replacement) in conditions.items():
if not condition:
final_response = final_response.replace(to_replace, replacement)
return final_response
def output_to_csv(final_response):
parsed_content = parse_string(final_response, ["video_name", "annotation"])
video_name = parsed_content['video_name'][0] if parsed_content['video_name'] else None
annotations_dict = parse_annotations(parsed_content['annotation']) if parsed_content['annotation'] else {}
df = pd.DataFrame([{'video_name': video_name, **annotations_dict}])
# Save the DataFrame as a CSV file
csv_file_path = f"{video_name}_annotations.csv"
df.to_csv(csv_file_path, index=False)
return csv_file_path # Return the path to the CSV file for download
# Examples for the interface
examples = [
["videos/2016-01-01_0100_US_KNBC_Channel_4_News_1867.16-1871.38_now.mp4", True, False, True, False],
["videos/2016-01-01_0200_US_KNBC_Channel_4_News_1329.12-1333.29_tonight.mp4", False, True, True, True],
["videos/2016-01-01_0830_US_KNBC_Tonight_Show_with_Jimmy_Fallon_725.45-729.76_tonight.mp4", True, False, False, True],
["videos/2016-01-01_0200_US_KOCE_The_PBS_Newshour_577.03-581.31_tonight.mp4", False, True, True, False],
["videos/2016-01-01_1400_US_KTTV-FOX_Morning_News_at_6AM_1842.36-1846.68_this_year.mp4", True, True, False, False],
["videos/2016-01-02_0735_US_KCBS_Late_Show_with_Stephen_Colbert_285.94-290.67_this_year.mp4", False, True, True, True],
["videos/2016-01-13_2200_US_KTTV-FOX_The_Doctor_Oz_Show_1709.79-1714.17_this_month.mp4", True, False, False, True],
["videos/2016-01-01_1400_US_KTTV-FOX_Morning_News_at_6AM_1842.36-1846.68_this_year.mp4", False, True, True, False],
["videos/2016-01-01_1300_US_KNBC_Today_in_LA_at_5am_12.46-16.95_this_morning.mp4", True, False, False, True],
["videos/2016-01-05_0200_US_KNBC_Channel_4_News_1561.29-1565.95_next_week.mp4", False, True, True, False],
["videos/2016-01-28_0700_US_KNBC_Channel_4_News_at_11PM_629.56-633.99_in_the_future.mp4", True, False, False, True]
]
title = "GSoC Super Raid Annotator"
description = "Annotate Videos"
article = "<p style='text-align: center'><a href='https://github.com/OpenBMB/MiniCPM-V' target='_blank'>Model GitHub Repo</a> | <a href='https://huggingface.co/openbmb/MiniCPM-V-2_6' target='_blank'>Model Page</a></p>"
custom_theme = gr.themes.Soft(primary_hue="red", secondary_hue="red")
with gr.Blocks(theme=custom_theme) as demo:
gr.Markdown(f"# {title}")
gr.Markdown(description)
gr.Markdown(article)
with gr.Row():
with gr.Column():
video = gr.Video(label="Video")
standing = gr.Checkbox(label="Standing")
hands = gr.Checkbox(label="Hands Free")
location = gr.Checkbox(label="Indoors")
screen = gr.Checkbox(label="Screen Interaction")
submit_btn = gr.Button("Generate Annotations")
generate_csv_btn = gr.Button("Generate CSV")
with gr.Column():
response = gr.Textbox(label="Video Description", show_label=True, show_copy_button=True)
csv_output = gr.File(label="Download CSV", interactive=False)
submit_btn.click(
fn=process_video_and_questions,
inputs=[video, standing, hands, location, screen],
outputs=response
)
generate_csv_btn.click(
fn=output_to_csv,
inputs=response,
outputs=csv_output
)
gr.Examples(examples=examples, inputs=[video, standing, hands, location, screen])
demo.launch(debug=False)
|