File size: 5,886 Bytes
4a339a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# --------------------------------------------------------
# YOLOv12 Streamlit App with Emoji-Powered UI ๐Ÿ˜Ž๐Ÿš€
# Based on yolov10: https://github.com/THU-MIG/yolov10/app.py
# --------------------------------------------------------

import streamlit as st
import cv2
import tempfile
from ultralytics import YOLO
from PIL import Image
import os

# Page config with a cool vibe
st.set_page_config(
    page_title="YOLOv12 Detector ๐Ÿ•ต๏ธโ€โ™‚๏ธ",
    page_icon="๐Ÿ”",
    layout="wide"
)

def yolov12_inference(uploaded_file, model_id, image_size, conf_threshold, input_type):
    """The magic happens here โœจ"""
    model = YOLO(model_id)
    
    if input_type == "Image" and uploaded_file:
        with st.spinner("๐Ÿ–ผ๏ธ Painting detections..."):
            image = Image.open(uploaded_file)
            results = model.predict(source=image, imgsz=image_size, conf=conf_threshold)
            annotated_image = results[0].plot()
            return annotated_image[:, :, ::-1], None
            
    elif input_type == "Video" and uploaded_file:
        with st.spinner("๐ŸŽฅ Cooking up a detected video..."):
            video_path = tempfile.mktemp(suffix=".mp4")
            with open(video_path, "wb") as f:
                f.write(uploaded_file.read())

            cap = cv2.VideoCapture(video_path)
            fps = cap.get(cv2.CAP_PROP_FPS)
            frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

            output_video_path = tempfile.mktemp(suffix=".mp4")
            out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

            frame_count = 0
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                results = model.predict(source=frame, imgsz=image_size, conf=conf_threshold)
                annotated_frame = results[0].plot()
                out.write(annotated_frame)
                frame_count += 1
                if frame_count % 30 == 0:
                    st.text(f"๐Ÿณ Processed {frame_count} frames...")

            cap.release()
            out.release()
            
            if os.path.exists(video_path):
                os.remove(video_path)
                
            return None, output_video_path
            
    return None, None

def main():
    # Header with flair
    st.title("YOLOv12: Object Detection Superhero ๐Ÿฆธโ€โ™‚๏ธ")
    st.markdown("Powered by xAI's cosmic tech ๐ŸŒŒ | [arXiv ๐Ÿ“œ](https://arxiv.org/abs/2502.12524) | [GitHub ๐Ÿ™](https://github.com/sunsmarterjie/yolov12)")
    
    # Layout in two columns
    col1, col2 = st.columns([1, 1])
    
    with col1:
        st.subheader("๐ŸŽฎ Control Room")
        
        # Upload section
        uploaded_file = st.file_uploader(
            "Drop your file here ๐Ÿ“ฅ - Images or Videos welcome!",
            type=['jpg', 'jpeg', 'png', 'mp4'],
            help="Upload an image or video to detect objects in!"
        )
        
        # Input type selector
        input_type = st.radio(
            "What's your flavor? ๐Ÿฆ",
            ("Image", "Video"),
            help="Tell me if it's a still or moving picture!"
        )
        
        # Model selection
        model_id = st.selectbox(
            "Pick your YOLO weapon โš”๏ธ",
            ["yolov12n.pt", "yolov12s.pt", "yolov12m.pt", "yolov12l.pt", "yolov12x.pt"],
            index=2,
            help="Choose your model power level: n (nano) to x (extra spicy)!"
        )
        
        # Image size slider
        image_size = st.slider(
            "Zoom level ๐Ÿ”",
            min_value=320,
            max_value=1280,
            value=640,
            step=32,
            help="Bigger numbers = sharper eyes (but slower)!"
        )
        
        # Confidence threshold
        conf_threshold = st.slider(
            "Certainty meter ๐ŸŽฏ",
            min_value=0.0,
            max_value=1.0,
            value=0.25,
            step=0.05,
            help="How sure should I be? Higher = pickier!"
        )
        
        # The big red button
        if st.button("Detect Objects! ๐Ÿš€", help="Click to unleash the detection magic!"):
            if uploaded_file is None:
                st.error("Yo! Upload something first ๐Ÿ™ˆ")
            else:
                annotated_image, annotated_video = yolov12_inference(
                    uploaded_file, model_id, image_size, conf_threshold, input_type
                )
                st.session_state['results'] = (annotated_image, annotated_video)
    
    with col2:
        st.subheader("๐Ÿ–ฅ๏ธ Detection HQ")
        
        # Display results
        if 'results' in st.session_state:
            annotated_image, annotated_video = st.session_state['results']
            
            if input_type == "Image" and annotated_image is not None:
                st.image(
                    annotated_image,
                    caption="Your Detected Masterpiece ๐ŸŽจ",
                    use_column_width=True
                )
            elif input_type == "Video" and annotated_video is not None:
                st.video(
                    annotated_video,
                    format="video/mp4",
                    start_time=0
                )
                # Clean up temporary video file
                if os.path.exists(annotated_video):
                    os.remove(annotated_video)
            else:
                st.warning("Nothing to show yet! Hit the button! โšก")
        else:
            st.info("Awaiting your command, captain! ๐Ÿ–– Upload and detect to see results!")
    
    # Footer with sass
    st.markdown("---")
    st.markdown("Built with ๐Ÿ’– by xAI's minions | Objects beware, YOLOv12 is here! ๐Ÿ˜ˆ")

if __name__ == '__main__':
    main()