# --------------------------------------------------------
# YOLOv12 Streamlit App with Emoji-Powered UI 😎🚀
# Based on yolov10: https://github.com/THU-MIG/yolov10/app.py
# --------------------------------------------------------

import streamlit as st
import cv2
import tempfile
from ultralytics import YOLO
from PIL import Image
import os

# Page config with a cool vibe
st.set_page_config(
    page_title="YOLOv12 Detector 🕵️‍♂️",
    page_icon="🔍",
    layout="wide"
)

def yolov12_inference(uploaded_file, model_id, image_size, conf_threshold, input_type):
    """The magic happens here ✨"""
    model = YOLO(model_id)
    
    if input_type == "Image" and uploaded_file:
        with st.spinner("🖼️ Painting detections..."):
            image = Image.open(uploaded_file)
            results = model.predict(source=image, imgsz=image_size, conf=conf_threshold)
            annotated_image = results[0].plot()
            return annotated_image[:, :, ::-1], None
            
    elif input_type == "Video" and uploaded_file:
        with st.spinner("🎥 Cooking up a detected video..."):
            video_path = tempfile.mktemp(suffix=".mp4")
            with open(video_path, "wb") as f:
                f.write(uploaded_file.read())

            cap = cv2.VideoCapture(video_path)
            fps = cap.get(cv2.CAP_PROP_FPS)
            frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

            output_video_path = tempfile.mktemp(suffix=".mp4")
            out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

            frame_count = 0
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                results = model.predict(source=frame, imgsz=image_size, conf=conf_threshold)
                annotated_frame = results[0].plot()
                out.write(annotated_frame)
                frame_count += 1
                if frame_count % 30 == 0:
                    st.text(f"🍳 Processed {frame_count} frames...")

            cap.release()
            out.release()
            
            if os.path.exists(video_path):
                os.remove(video_path)
                
            return None, output_video_path
            
    return None, None

def main():
    # Header with flair
    st.title("YOLOv12: Object Detection Superhero 🦸‍♂️")
    st.markdown("Powered by xAI's cosmic tech 🌌 | [arXiv 📜](https://arxiv.org/abs/2502.12524) | [GitHub 🐙](https://github.com/sunsmarterjie/yolov12)")
    
    # Layout in two columns
    col1, col2 = st.columns([1, 1])
    
    with col1:
        st.subheader("🎮 Control Room")
        
        # Upload section
        uploaded_file = st.file_uploader(
            "Drop your file here 📥 - Images or Videos welcome!",
            type=['jpg', 'jpeg', 'png', 'mp4'],
            help="Upload an image or video to detect objects in!"
        )
        
        # Input type selector
        input_type = st.radio(
            "What's your flavor? 🍦",
            ("Image", "Video"),
            help="Tell me if it's a still or moving picture!"
        )
        
        # Model selection
        model_id = st.selectbox(
            "Pick your YOLO weapon ⚔️",
            ["yolov12n.pt", "yolov12s.pt", "yolov12m.pt", "yolov12l.pt", "yolov12x.pt"],
            index=2,
            help="Choose your model power level: n (nano) to x (extra spicy)!"
        )
        
        # Image size slider
        image_size = st.slider(
            "Zoom level 🔍",
            min_value=320,
            max_value=1280,
            value=640,
            step=32,
            help="Bigger numbers = sharper eyes (but slower)!"
        )
        
        # Confidence threshold
        conf_threshold = st.slider(
            "Certainty meter 🎯",
            min_value=0.0,
            max_value=1.0,
            value=0.25,
            step=0.05,
            help="How sure should I be? Higher = pickier!"
        )
        
        # The big red button
        if st.button("Detect Objects! 🚀", help="Click to unleash the detection magic!"):
            if uploaded_file is None:
                st.error("Yo! Upload something first 🙈")
            else:
                annotated_image, annotated_video = yolov12_inference(
                    uploaded_file, model_id, image_size, conf_threshold, input_type
                )
                st.session_state['results'] = (annotated_image, annotated_video)
    
    with col2:
        st.subheader("🖥️ Detection HQ")
        
        # Display results
        if 'results' in st.session_state:
            annotated_image, annotated_video = st.session_state['results']
            
            if input_type == "Image" and annotated_image is not None:
                st.image(
                    annotated_image,
                    caption="Your Detected Masterpiece 🎨",
                    use_column_width=True
                )
            elif input_type == "Video" and annotated_video is not None:
                st.video(
                    annotated_video,
                    format="video/mp4",
                    start_time=0
                )
                # Clean up temporary video file
                if os.path.exists(annotated_video):
                    os.remove(annotated_video)
            else:
                st.warning("Nothing to show yet! Hit the button! ⚡")
        else:
            st.info("Awaiting your command, captain! 🖖 Upload and detect to see results!")
    
    # Footer with sass
    st.markdown("---")
    st.markdown("Built with 💖 by xAI's minions | Objects beware, YOLOv12 is here! 😈")

if __name__ == '__main__':
    main()