import cv2 import random from ultralytics import YOLO from gtts import gTTS from datetime import datetime, timedelta import gradio as gr # Load YOLOv8 model yolo = YOLO("yolov8n.pt") # Audio alert settings alert_categories = {"person", "cat", "dog", "knife", "fire", "gun"} last_alert_time = {} alert_cooldown = timedelta(seconds=10) # Create audio alert as downloadable file def generate_audio_alert(label, position): phrases = [ f"Be careful, there's a {label} on your {position}.", f"Watch out! {label} detected on your {position}.", f"Alert! A {label} is on your {position}.", ] caution_note = random.choice(phrases) # Save audio alert as an MP3 file temp_file_path = f"audio_alert_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3" tts = gTTS(caution_note) tts.save(temp_file_path) return temp_file_path # Process a single frame def process_frame(image, enable_audio): results = yolo(image) result = results[0] detected_objects = {} audio_files = [] for box in result.boxes: x1, y1, x2, y2 = map(int, box.xyxy[0]) label = result.names[int(box.cls[0])] if enable_audio and label in alert_categories: frame_center_x = image.shape[1] // 2 obj_center_x = (x1 + x2) // 2 position = "left" if obj_center_x < frame_center_x else "right" detected_objects[label] = position current_time = datetime.now() if ( label not in last_alert_time or current_time - last_alert_time[label] > alert_cooldown ): audio_file = generate_audio_alert(label, position) audio_files.append(audio_file) last_alert_time[label] = current_time # Draw bounding boxes cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2) cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) return image, audio_files # Gradio interface function def object_detection_webcam(enable_audio): cap = cv2.VideoCapture(0) if not cap.isOpened(): return "Error: Unable to access the camera." while True: ret, frame = cap.read() if not ret: return "Error: Unable to read from camera." frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) processed_frame, audio_files = process_frame(frame, enable_audio) yield {"image": processed_frame, "audio": audio_files} cap.release() # Gradio UI def gradio_app(): return gr.Interface( fn=object_detection_webcam, inputs=[gr.Checkbox(label="Enable Audio Alerts", value=False)], outputs=[ gr.Image(label="Processed Frame"), gr.File(label="Audio Alerts"), ], live=True, ) gradio_app().launch()