CCTV_Analysis / vision.py
hxrdxk's picture
Upload folder using huggingface_hub
2a1bd33 verified
raw
history blame
2.62 kB
import cv2
import base64
import os
from openai import OpenAI
import gradio as gr
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
# Add this list of common video file extensions
ALLOWED_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv', '.webm']
def process_video(video_path):
if video_path is None:
return "Please upload a video file."
# Check if the file extension is in the allowed list
_, file_extension = os.path.splitext(video_path)
if file_extension.lower() not in ALLOWED_EXTENSIONS:
return f"Invalid file type. Allowed formats are: {', '.join(ALLOWED_EXTENSIONS)}"
# Check if the input is a valid video file
video = cv2.VideoCapture(video_path)
if not video.isOpened():
return "Invalid input. Please upload a valid video file."
fps = video.get(cv2.CAP_PROP_FPS)
max_frames = int(15 * fps) # Limit to 15 seconds
base64Frames = []
frame_count = 0
while video.isOpened() and frame_count < max_frames:
success, frame = video.read()
if not success:
break
_, buffer = cv2.imencode(".jpg", frame)
base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
frame_count += 1
video.release()
PROMPT_MESSAGES = [
{
"role": "user",
"content": [
"These are frames from a video. Do sentiment analysis in accordance with all kind of cctv detection. Sentiment can be defined as normal, fight, criminal activity, shoplifting, burglary, animal fighting, animal cruelty, weapon detection, abandoned bags detection, crowd detection, fire detection, loitering detection. Return only the sentiment within a few words. Sentiment could be a combination of all the sentiments. If it doesn't match any of the sentiment then return what you think is the right sentiment.",
*map(lambda x: {"image": x, "resize": 768}, base64Frames[0::25]),
],
},
]
params = {
"model": "gpt-4o",
"messages": PROMPT_MESSAGES,
"max_tokens": 100,
}
result = client.chat.completions.create(**params)
return result.choices[0].message.content
iface = gr.Interface(
fn=process_video,
inputs=gr.Video(label="Upload a video (max 15 seconds)"),
outputs="text",
title="CCTV Video Analysis",
description=f"Upload a video (max 15 seconds) to analyze its sentiment. Allowed formats: {', '.join(ALLOWED_EXTENSIONS)}. Images are not accepted.",
)
iface.launch(share=True)