File size: 2,620 Bytes
2a1bd33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import cv2
import base64
import os
from openai import OpenAI
import gradio as gr

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

# Add this list of common video file extensions
ALLOWED_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv', '.webm']

def process_video(video_path):
    if video_path is None:
        return "Please upload a video file."
    
    # Check if the file extension is in the allowed list
    _, file_extension = os.path.splitext(video_path)
    if file_extension.lower() not in ALLOWED_EXTENSIONS:
        return f"Invalid file type. Allowed formats are: {', '.join(ALLOWED_EXTENSIONS)}"
    
    # Check if the input is a valid video file
    video = cv2.VideoCapture(video_path)
    if not video.isOpened():
        return "Invalid input. Please upload a valid video file."

    fps = video.get(cv2.CAP_PROP_FPS)
    max_frames = int(15 * fps)  # Limit to 15 seconds

    base64Frames = []
    frame_count = 0
    while video.isOpened() and frame_count < max_frames:
        success, frame = video.read()
        if not success:
            break
        _, buffer = cv2.imencode(".jpg", frame)
        base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
        frame_count += 1

    video.release()
    
    PROMPT_MESSAGES = [
        {
            "role": "user",
            "content": [
                "These are frames from a video. Do sentiment analysis in accordance with all kind of cctv detection. Sentiment can be defined as normal, fight, criminal activity, shoplifting, burglary, animal fighting, animal cruelty, weapon detection, abandoned bags detection, crowd detection, fire detection, loitering detection. Return only the sentiment within a few words. Sentiment could be a combination of all the sentiments. If it doesn't match any of the sentiment then return what you think is the right sentiment.",
                *map(lambda x: {"image": x, "resize": 768}, base64Frames[0::25]),
            ],
        },
    ]
    
    params = {
        "model": "gpt-4o",
        "messages": PROMPT_MESSAGES,
        "max_tokens": 100,
    }

    result = client.chat.completions.create(**params)
    return result.choices[0].message.content

iface = gr.Interface(
    fn=process_video,
    inputs=gr.Video(label="Upload a video (max 15 seconds)"),
    outputs="text",
    title="CCTV Video Analysis",
    description=f"Upload a video (max 15 seconds) to analyze its sentiment. Allowed formats: {', '.join(ALLOWED_EXTENSIONS)}. Images are not accepted.",
)

iface.launch(share=True)