import cv2 import gradio as gr import supervision as sv from ultralytics import YOLO from PIL import Image import torch import time import numpy as np import uuid import spaces ver=[0,0,0,0,0,0,6,7,8,9,10,11] ltr=["n","s","m","1","x"] tsk=["","-seg","-pose","-obb","-cls"] #yolov8s.pt modin=f"yolov{ver[9]}{ltr[1]}{tsk[0]}.pt" model = YOLO(modin) annotators = ["Box","RoundBox","BoxCorner","Color", "Circle","Dot","Triangle","Elipse","Halo", "PercentageBar","Mask","Polygon","Label", "RichLabel","Icon","Crop","Blur","Pixelate","HeatMap"] @spaces.GPU def stream_object_detection(video): SUBSAMPLE=1 cap = cv2.VideoCapture(video) # This means we will output mp4 videos video_codec = cv2.VideoWriter_fourcc(*"mp4v") # type: ignore fps = int(cap.get(cv2.CAP_PROP_FPS)) desired_fps = fps // SUBSAMPLE width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) // 2 height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) // 2 iterating, frame = cap.read() n_frames = 0 output_video_name = f"output_{uuid.uuid4()}.mp4" output_video = cv2.VideoWriter(output_video_name, video_codec, desired_fps, (width, height)) # type: ignore while iterating: frame = cv2.resize( frame, (0,0), fx=0.5, fy=0.5) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) result = model(Image.fromarray(frame))[0] detections = sv.Detections.from_ultralytics(result) print(detections) box_annotator = eval(f'sv.{annotators[0]}Annotator()') outp = box_annotator.annotate( scene=frame.copy(), detections=detections) #outp = draw_box(frame,detections) frame = np.array(outp) # Convert RGB to BGR frame = frame[:, :, ::-1].copy() output_video.write(frame) batch = [] output_video.release() yield output_video_name,detections output_video_name = f"output_{uuid.uuid4()}.mp4" output_video = cv2.VideoWriter(output_video_name, video_codec, desired_fps, (width, height)) # type: ignore iterating, frame = cap.read() n_frames += 1 with gr.Blocks() as app: gr.HTML("