Spaces:

tree3po
/

supervision

Sleeping

File size: 3,680 Bytes

e751200
 
 
 
 
 
 
 
 
2d673aa
c269db1
18d2d1f
c269db1
80826f2
10cd0ae
 
 
 
747fbed
 
 
 
 
 
2b85a9e
2d673aa
747fbed
 
e751200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
747fbed
e4dfe6c
747fbed
 
10cd0ae
e4dfe6c
 
 
 
 
e751200
 
 
 
 
 
718ddba
e751200
 
 
 
747fbed
 
 
e751200
 
 
747fbed
e751200
747fbed
 
 
 
 
 
 
 
 
2b85a9e
747fbed

import cv2
import gradio as gr
import supervision as sv
from ultralytics import YOLO
from PIL import Image
import torch
import time
import numpy as np
import uuid
import spaces

ver=[0,0,0,0,0,0,6,7,8,9,10,11]
ltr=["n","s","m","1","x"]
tsk=["","-seg","-pose","-obb","-cls"]
annotators = ["Box","RoundBox","BoxCorner","Color",
              "Circle","Dot","Triangle","Elipse","Halo",
              "PercentageBar","Mask","Polygon","Label",
              "RichLabel","Icon","Crop","Blur","Pixelate","HeatMap"]
def model_select(v,l,t):
    modin=f"yolov{v}{l}{t}.pt"
    print(modin)
    global model
    model = YOLO(modin)


@spaces.GPU
def stream_object_detection(video,anno):
    SUBSAMPLE=2
    cap = cv2.VideoCapture(video)
    # This means we will output mp4 videos
    video_codec = cv2.VideoWriter_fourcc(*"mp4v") # type: ignore
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    desired_fps = fps // SUBSAMPLE
    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) // 2
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) // 2
    iterating, frame = cap.read()
    n_frames = 0
    output_video_name = f"output_{uuid.uuid4()}.mp4"
    output_video = cv2.VideoWriter(output_video_name, video_codec, desired_fps, (width, height)) # type: ignore

    while iterating:
        frame = cv2.resize( frame, (0,0), fx=0.5, fy=0.5)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        result = model(Image.fromarray(frame))[0]
        detections = sv.Detections.from_ultralytics(result)
        #print(detections)

        box_annotator = eval(f'sv.{anno}Annotator()')
        #box_annotator = eval(f'sv.{annotators[0]}Annotator()')
        
        outp = box_annotator.annotate(
          scene=frame.copy(),
          detections=detections)
        
        #outp = draw_box(frame,detections)
        frame = np.array(outp)
        # Convert RGB to BGR
        frame = frame[:, :, ::-1].copy()
        output_video.write(frame)
        batch = []
        output_video.release()
        yield output_video_name,detections
        output_video_name = f"output_{uuid.uuid4()}.mp4"
        output_video = cv2.VideoWriter(output_video_name, video_codec, desired_fps, (width, height)) # type: ignore
        iterating, frame = cap.read()
        n_frames += 1
#css="body{background:aliceblue;}"
with gr.Blocks(theme="Nymbo/Nymbo_Theme_5") as app:
    gr.HTML("<div style='font-size: 50px;font-weight: 800;'>SuperVision</div><div style='font-size: 30px;'>Video Object Detection</div><div>Github:<a href='https://github.com/roboflow/supervision' target='_blank'>https://github.com/roboflow/supervision</a></div>")
    #inp = gr.Image(type="filepath")
    with gr.Row():
        with gr.Column():
            inp = gr.Video(height=300)
            btn = gr.Button()
            with gr.Accordion("Controls",open=False):
                with gr.Group():
                    dd1=gr.Dropdown(label="Version",choices=ver[6:],value=ver[9],allow_custom_value=True)
                    dd2=gr.Dropdown(label="Ltr", choices=ltr,value=ltr[1],allow_custom_value=True)
                    dd3=gr.Dropdown(label="Task",choices=tsk,value=tsk[0],allow_custom_value=True)
                    dd4=gr.Dropdown(label="Annotator",choices=annotators,value="Box")
        with gr.Column():
            outp_v = gr.Video(label="Processed Video", streaming=True, autoplay=True,height=300)
            outp_j = gr.JSON()

    btn.click(stream_object_detection,[inp,dd4],[outp_v,outp_j])
    app.load(model_select,[dd1,dd2,dd3],None)
    dd1.change(model_select,[dd1,dd2,dd3],None)    
    dd2.change(model_select,[dd1,dd2,dd3],None)    
    dd3.change(model_select,[dd1,dd2,dd3],None)    
app.queue().launch()