Spaces:
Sleeping
Sleeping
import cv2 | |
import base64 | |
import os | |
from openai import OpenAI | |
import gradio as gr | |
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) | |
# Add this list of common video file extensions | |
ALLOWED_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv', '.webm'] | |
def process_video(video_path): | |
if video_path is None: | |
return "Please upload a video file." | |
# Check if the file extension is in the allowed list | |
_, file_extension = os.path.splitext(video_path) | |
if file_extension.lower() not in ALLOWED_EXTENSIONS: | |
return f"Invalid file type. Allowed formats are: {', '.join(ALLOWED_EXTENSIONS)}" | |
# Check if the input is a valid video file | |
video = cv2.VideoCapture(video_path) | |
if not video.isOpened(): | |
return "Invalid input. Please upload a valid video file." | |
fps = video.get(cv2.CAP_PROP_FPS) | |
max_frames = int(15 * fps) # Limit to 15 seconds | |
base64Frames = [] | |
frame_count = 0 | |
while video.isOpened() and frame_count < max_frames: | |
success, frame = video.read() | |
if not success: | |
break | |
_, buffer = cv2.imencode(".jpg", frame) | |
base64Frames.append(base64.b64encode(buffer).decode("utf-8")) | |
frame_count += 1 | |
video.release() | |
PROMPT_MESSAGES = [ | |
{ | |
"role": "user", | |
"content": [ | |
"These are frames from a video. Do sentiment analysis in accordance with all kind of cctv detection. Sentiment can be defined as normal, fight, criminal activity, shoplifting, burglary, animal fighting, animal cruelty, weapon detection, abandoned bags detection, crowd detection, fire detection, loitering detection. Return only the sentiment within a few words. Sentiment could be a combination of all the sentiments. If it doesn't match any of the sentiment then return what you think is the right sentiment.", | |
*map(lambda x: {"image": x, "resize": 768}, base64Frames[0::25]), | |
], | |
}, | |
] | |
params = { | |
"model": "gpt-4o", | |
"messages": PROMPT_MESSAGES, | |
"max_tokens": 100, | |
} | |
result = client.chat.completions.create(**params) | |
return result.choices[0].message.content | |
iface = gr.Interface( | |
fn=process_video, | |
inputs=gr.Video(label="Upload a video (max 15 seconds)"), | |
outputs="text", | |
title="CCTV Video Analysis", | |
description=f"Upload a video (max 15 seconds) to analyze its sentiment. Allowed formats: {', '.join(ALLOWED_EXTENSIONS)}. Images are not accepted.", | |
) | |
iface.launch(share=True) | |