# Created by yarramsettinaresh GORAKA DIGITAL PRIVATE LIMITED at 01/11/24
import gradio as gr
import cv2
import time
from ultralytics import YOLO
import numpy as np

# Load your models
model_path = "model_- 11 october 2024 11_07.pt"
model = YOLO(model_path)
# Initialize global video capture variable
cap = None

def ultralytics_predict(model, frame):
    confidence_threshold = 0.2
    start_time = time.time()
    results = model(frame)  # Perform inference on the frame
    end_time = time.time()

    duration = end_time - start_time
    print(f"Prediction duration: {duration:.4f} seconds")
    duration_str = f"{duration:.4f} S"

    object_count = {}  # Dictionary to store counts of detected objects

    for detection in results[0].boxes:  # Iterate through detections
        conf = float(detection.conf[0])  # Confidence score
        if conf > confidence_threshold:
            conf, pos, text, color = ultralytics(detection, duration_str)
            cv2.rectangle(frame, pos[0], pos[1], color, 2)
            cv2.putText(frame, text, (pos[0][0], pos[0][1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

            # Update object count
            class_id = int(detection.cls[0])
            class_name = model.names[class_id]
            if class_name not in object_count:
                object_count[class_name] = dict(count=0)
            object_mapp = object_count[class_name]
            object_mapp["count"] = object_mapp.get("count", 0) + 1

    y_offset = 150  # Initial y-offset for the text position
    text_x = frame.shape[1] - 300  # X position for the text

    for class_name, data in object_count.items():
        count_text = f"{class_name}: {data['count']}"

        # Get text size for rectangle dimensions
        (text_width, text_height), _ = cv2.getTextSize(count_text, cv2.FONT_HERSHEY_SIMPLEX, 1, 2)
        rect_x1, rect_y1 = text_x - 10, y_offset - text_height - 10
        rect_x2, rect_y2 = text_x + text_width + 10, y_offset + 10

        # Draw semi-transparent rectangle as background
        overlay = frame.copy()
        cv2.rectangle(overlay, (rect_x1, rect_y1), (rect_x2, rect_y2), (0, 255, 0), -1)  # Black rectangle
        alpha = 0.5  # Opacity level (0 = transparent, 1 = opaque)
        cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0, frame)

        # Draw red text on top of the rectangle
        cv2.putText(frame, count_text, (text_x, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        y_offset += 40  # Increase y-offset for the next class count

    return frame


def ultralytics(detection, duration):
    COLOUR_MAP = {
        0: (0, 0, 255),  # Red in BGR format
        1: (0, 255, 0)  # Green in BGR format
    }

    conf = float(detection.conf[0])  # Confidence score
    class_id = int(detection.cls[0])  # Class ID
    name = model.names[class_id]  # Get class name
    xmin, ymin, xmax, ymax = map(int, detection.xyxy[0])  # Bounding box coordinates
    color = COLOUR_MAP.get(class_id, (255, 255, 255))  # Default to white if not found

    # Draw bounding box and label on the frame
    pos = (xmin, ymin), (xmax, ymax)
    text = f"{name} {round(conf, 2)} :{duration}"

    return conf, pos, text, color


def process_frame():
    global cap
    ret, frame = cap.read()
    if not ret:
        cap.release()  # Release the video capture if no frame is captured
        return None
    frame = ultralytics_predict(model, frame)
    return frame   # Return frame and object count


def gradio_video_stream(video_file):
    print(f"gradio_video_stream init : {video_file}")
    global cap
    cap = cv2.VideoCapture(video_file)
    while True:
        frame = process_frame()
        if frame is None:
            break
        if isinstance(frame, np.ndarray):  # Check if frame is a valid numpy array
            yield cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        else:
            print("Invalid frame format")
        yield cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)


iface = gr.Interface(fn=gradio_video_stream,
                     inputs=gr.Video(label="Upload Video"),
                     outputs=gr.Image(),
                     ).launch()