import cv2
import random
from ultralytics import YOLO
from gtts import gTTS
from datetime import datetime, timedelta
import gradio as gr

# Load YOLOv8 model
yolo = YOLO("yolov8n.pt")

# Audio alert settings
alert_categories = {"person", "cat", "dog", "knife", "fire", "gun"}
last_alert_time = {}
alert_cooldown = timedelta(seconds=10)

# Create audio alert as downloadable file
def generate_audio_alert(label, position):
    phrases = [
        f"Be careful, there's a {label} on your {position}.",
        f"Watch out! {label} detected on your {position}.",
        f"Alert! A {label} is on your {position}.",
    ]
    caution_note = random.choice(phrases)

    # Save audio alert as an MP3 file
    temp_file_path = f"audio_alert_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
    tts = gTTS(caution_note)
    tts.save(temp_file_path)
    return temp_file_path

# Process a single frame
def process_frame(image, enable_audio):
    results = yolo(image)
    result = results[0]

    detected_objects = {}
    audio_files = []

    for box in result.boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        label = result.names[int(box.cls[0])]

        if enable_audio and label in alert_categories:
            frame_center_x = image.shape[1] // 2
            obj_center_x = (x1 + x2) // 2
            position = "left" if obj_center_x < frame_center_x else "right"

            detected_objects[label] = position

            current_time = datetime.now()
            if (
                label not in last_alert_time
                or current_time - last_alert_time[label] > alert_cooldown
            ):
                audio_file = generate_audio_alert(label, position)
                audio_files.append(audio_file)
                last_alert_time[label] = current_time

        # Draw bounding boxes
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    return image, audio_files

# Gradio interface function
def object_detection_webcam(enable_audio):
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        return "Error: Unable to access the camera."

    while True:
        ret, frame = cap.read()
        if not ret:
            return "Error: Unable to read from camera."

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        processed_frame, audio_files = process_frame(frame, enable_audio)

        yield {"image": processed_frame, "audio": audio_files}

    cap.release()

# Gradio UI
def gradio_app():
    return gr.Interface(
        fn=object_detection_webcam,
        inputs=[gr.Checkbox(label="Enable Audio Alerts", value=False)],
        outputs=[
            gr.Image(label="Processed Frame"),
            gr.File(label="Audio Alerts"),
        ],
        live=True,
    )

gradio_app().launch()