Spaces:
Running
Running
File size: 5,886 Bytes
4a339a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
# --------------------------------------------------------
# YOLOv12 Streamlit App with Emoji-Powered UI ๐๐
# Based on yolov10: https://github.com/THU-MIG/yolov10/app.py
# --------------------------------------------------------
import streamlit as st
import cv2
import tempfile
from ultralytics import YOLO
from PIL import Image
import os
# Page config with a cool vibe
st.set_page_config(
page_title="YOLOv12 Detector ๐ต๏ธโโ๏ธ",
page_icon="๐",
layout="wide"
)
def yolov12_inference(uploaded_file, model_id, image_size, conf_threshold, input_type):
"""The magic happens here โจ"""
model = YOLO(model_id)
if input_type == "Image" and uploaded_file:
with st.spinner("๐ผ๏ธ Painting detections..."):
image = Image.open(uploaded_file)
results = model.predict(source=image, imgsz=image_size, conf=conf_threshold)
annotated_image = results[0].plot()
return annotated_image[:, :, ::-1], None
elif input_type == "Video" and uploaded_file:
with st.spinner("๐ฅ Cooking up a detected video..."):
video_path = tempfile.mktemp(suffix=".mp4")
with open(video_path, "wb") as f:
f.write(uploaded_file.read())
cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
output_video_path = tempfile.mktemp(suffix=".mp4")
out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))
frame_count = 0
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
results = model.predict(source=frame, imgsz=image_size, conf=conf_threshold)
annotated_frame = results[0].plot()
out.write(annotated_frame)
frame_count += 1
if frame_count % 30 == 0:
st.text(f"๐ณ Processed {frame_count} frames...")
cap.release()
out.release()
if os.path.exists(video_path):
os.remove(video_path)
return None, output_video_path
return None, None
def main():
# Header with flair
st.title("YOLOv12: Object Detection Superhero ๐ฆธโโ๏ธ")
st.markdown("Powered by xAI's cosmic tech ๐ | [arXiv ๐](https://arxiv.org/abs/2502.12524) | [GitHub ๐](https://github.com/sunsmarterjie/yolov12)")
# Layout in two columns
col1, col2 = st.columns([1, 1])
with col1:
st.subheader("๐ฎ Control Room")
# Upload section
uploaded_file = st.file_uploader(
"Drop your file here ๐ฅ - Images or Videos welcome!",
type=['jpg', 'jpeg', 'png', 'mp4'],
help="Upload an image or video to detect objects in!"
)
# Input type selector
input_type = st.radio(
"What's your flavor? ๐ฆ",
("Image", "Video"),
help="Tell me if it's a still or moving picture!"
)
# Model selection
model_id = st.selectbox(
"Pick your YOLO weapon โ๏ธ",
["yolov12n.pt", "yolov12s.pt", "yolov12m.pt", "yolov12l.pt", "yolov12x.pt"],
index=2,
help="Choose your model power level: n (nano) to x (extra spicy)!"
)
# Image size slider
image_size = st.slider(
"Zoom level ๐",
min_value=320,
max_value=1280,
value=640,
step=32,
help="Bigger numbers = sharper eyes (but slower)!"
)
# Confidence threshold
conf_threshold = st.slider(
"Certainty meter ๐ฏ",
min_value=0.0,
max_value=1.0,
value=0.25,
step=0.05,
help="How sure should I be? Higher = pickier!"
)
# The big red button
if st.button("Detect Objects! ๐", help="Click to unleash the detection magic!"):
if uploaded_file is None:
st.error("Yo! Upload something first ๐")
else:
annotated_image, annotated_video = yolov12_inference(
uploaded_file, model_id, image_size, conf_threshold, input_type
)
st.session_state['results'] = (annotated_image, annotated_video)
with col2:
st.subheader("๐ฅ๏ธ Detection HQ")
# Display results
if 'results' in st.session_state:
annotated_image, annotated_video = st.session_state['results']
if input_type == "Image" and annotated_image is not None:
st.image(
annotated_image,
caption="Your Detected Masterpiece ๐จ",
use_column_width=True
)
elif input_type == "Video" and annotated_video is not None:
st.video(
annotated_video,
format="video/mp4",
start_time=0
)
# Clean up temporary video file
if os.path.exists(annotated_video):
os.remove(annotated_video)
else:
st.warning("Nothing to show yet! Hit the button! โก")
else:
st.info("Awaiting your command, captain! ๐ Upload and detect to see results!")
# Footer with sass
st.markdown("---")
st.markdown("Built with ๐ by xAI's minions | Objects beware, YOLOv12 is here! ๐")
if __name__ == '__main__':
main() |