import cv2 import gradio as gr from transformers import pipeline # Load Hugging Face models pose_detection = pipeline("object-detection", model="./yolov8-pose") # Local YOLOv8 pose model suspicious_activity_detection = pipeline("text-classification", model="./suspicious_activity_model") # Local LSTM model # Function to process a single frame and predict suspicious activity def process_frame(frame): """ Detect persons in the frame and determine if they exhibit suspicious behavior. """ results = pose_detection(frame) for person in results: if person['label'] == 'person' and 'box' in person: x1, y1, x2, y2 = map(int, person['box'].values()) if 'keypoints' in person: keypoints = person['keypoints'] keypoints_input = " ".join(map(str, [kp for point in keypoints for kp in point[:2]])) # Predict suspicious activity prediction = suspicious_activity_detection(keypoints_input)[0]['label'] color = (0, 0, 255) if prediction == "Suspicious" else (0, 255, 0) label = 'Suspicious' if prediction == "Suspicious" else 'Normal' # Annotate frame cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2) cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) else: print("No keypoints found for detected person.") return frame # Gradio interface def live_detection(frame): frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Convert BGR to RGB for Gradio processed_frame = process_frame(frame) return cv2.cvtColor(processed_frame, cv2.COLOR_RGB2BGR) # Convert back to BGR for display # Gradio UI interface = gr.Interface( fn=live_detection, inputs=gr.Image(source="webcam", tool="editor", type="numpy"), outputs=gr.Image(type="numpy", label="Processed Video Stream"), live=True, description="Real-time Suspicious Activity Detection" ) if __name__ == "__main__": interface.launch(server_name="0.0.0.0", server_port=7860)