Spaces:
Runtime error
Runtime error
# Gradio YOLOv5 Det v0.3 | |
# author: Zeng Yifu(曾逸夫) | |
# creation time: 2022-05-09 | |
# email: zyfiy1314@163.com | |
# project homepage: https://gitee.com/CV_Lab/gradio_yolov5_det | |
import os | |
os.system("pip install gradio==2.9.4") | |
import argparse | |
import csv | |
import json | |
import sys | |
from pathlib import Path | |
import pandas as pd | |
import gradio as gr | |
import torch | |
import yaml | |
from PIL import Image, ImageDraw, ImageFont | |
from util.fonts_opt import is_fonts | |
from util.pdf_opt import pdf_generate | |
ROOT_PATH = sys.path[0] # root directory | |
# model path | |
model_path = "ultralytics/yolov5" | |
# Gradio YOLOv5 Det version | |
GYD_VERSION = "Gradio YOLOv5 Det v0.3" | |
# model name temporary variable | |
model_name_tmp = "" | |
# Device temporary variables | |
device_tmp = "" | |
# File extension | |
suffix_list = [".csv", ".yaml"] | |
# font size | |
FONTSIZE = 25 | |
# object style | |
obj_style = ["Small Object", "Medium Object", "Large Object"] | |
def parse_args(known=False): | |
parser = argparse.ArgumentParser(description="Gradio YOLOv5 Det v0.3") | |
parser.add_argument("--source", "-src", default="upload", type=str, help="input source") | |
parser.add_argument("--img_tool", "-it", default="editor", type=str, help="input image tool") | |
parser.add_argument("--model_name", "-mn", default="yolov5s", type=str, help="model name") | |
parser.add_argument( | |
"--model_cfg", | |
"-mc", | |
default="./model_config/model_name_p5_p6_all.yaml", | |
type=str, | |
help="model config", | |
) | |
parser.add_argument( | |
"--cls_name", | |
"-cls", | |
default="./cls_name/cls_name_en.yaml", | |
type=str, | |
help="cls name", | |
) | |
parser.add_argument( | |
"--nms_conf", | |
"-conf", | |
default=0.5, | |
type=float, | |
help="model NMS confidence threshold", | |
) | |
parser.add_argument("--nms_iou", "-iou", default=0.45, type=float, help="model NMS IoU threshold") | |
parser.add_argument( | |
"--device", | |
"-dev", | |
default="cpu", | |
type=str, | |
help="cuda or cpu", | |
) | |
parser.add_argument("--inference_size", "-isz", default=640, type=int, help="model inference size") | |
parser.add_argument("--max_detnum", "-mdn", default="50", type=str, help="model max det num") | |
args = parser.parse_known_args()[0] if known else parser.parse_args() | |
return args | |
# yaml file parsing | |
def yaml_parse(file_path): | |
return yaml.safe_load(open(file_path, encoding="utf-8").read()) | |
# yaml csv file parsing | |
def yaml_csv(file_path, file_tag): | |
file_suffix = Path(file_path).suffix | |
if file_suffix == suffix_list[0]: | |
# model name | |
file_names = [i[0] for i in list(csv.reader(open(file_path)))] # csv version | |
elif file_suffix == suffix_list[1]: | |
# model name | |
file_names = yaml_parse(file_path).get(file_tag) # yaml version | |
else: | |
print(f"{file_path} is not in the correct format! Program exits!") | |
sys.exit() | |
return file_names | |
# model loading | |
def model_loading(model_name, device): | |
# load model | |
model = torch.hub.load( | |
model_path, model_name, force_reload=True, device=device, _verbose=False | |
) | |
return model | |
# check information | |
def export_json(results, model, img_size): | |
return [ | |
[ | |
{ | |
"id": i, | |
"class": int(result[i][5]), | |
# "class_name": model.model.names[int(result[i][5])], | |
"class_name": model_cls_name_cp[int(result[i][5])], | |
"normalized_box": { | |
"x0": round(result[i][:4].tolist()[0], 6), | |
"y0": round(result[i][:4].tolist()[1], 6), | |
"x1": round(result[i][:4].tolist()[2], 6), | |
"y1": round(result[i][:4].tolist()[3], 6),}, | |
"confidence": round(float(result[i][4]), 2), | |
"fps": round(1000 / float(results.t[1]), 2), | |
"width": img_size[0], | |
"height": img_size[1],} for i in range(len(result))] for result in results.xyxyn] | |
# frame conversion | |
def pil_draw(img, countdown_msg, textFont, xyxy, font_size, opt): | |
img_pil = ImageDraw.Draw(img) | |
img_pil.rectangle(xyxy, fill=None, outline="green") # bounding box | |
if "label" in opt: | |
text_w, text_h = textFont.getsize(countdown_msg) # Label size | |
img_pil.rectangle( | |
(xyxy[0], xyxy[1], xyxy[0] + text_w, xyxy[1] + text_h), | |
fill="green", | |
outline="green", | |
) # label background | |
img_pil.multiline_text( | |
(xyxy[0], xyxy[1]), | |
countdown_msg, | |
fill=(205, 250, 255), | |
font=textFont, | |
align="center", | |
) | |
return img | |
# YOLOv5 image detection function | |
def yolo_det(img, device, model_name, inference_size, conf, iou, max_num, model_cls, opt): | |
global model, model_name_tmp, device_tmp | |
# object size num | |
s_obj, m_obj, l_obj = 0, 0, 0 | |
# object area list | |
area_obj_all = [] | |
if model_name_tmp != model_name: | |
# Model judgment to avoid repeated loading | |
model_name_tmp = model_name | |
model = model_loading(model_name_tmp, device) | |
elif device_tmp != device: | |
device_tmp = device | |
model = model_loading(model_name_tmp, device) | |
# -------------Model tuning ------------- | |
model.conf = conf # NMS confidence threshold | |
model.iou = iou # NMS IoU threshold | |
model.max_det = int(max_num) # Maximum number of detection frames | |
model.classes = model_cls # model classes | |
results = model(img, size=inference_size) # detection | |
dataframe = results.pandas().xyxy[0].round(2) | |
img_size = img.size # frame size | |
# ----------------Load fonts---------------- | |
yaml_index = cls_name.index(".yaml") | |
cls_name_lang = cls_name[yaml_index - 2:yaml_index] | |
if cls_name_lang == "zh": | |
# Chinese | |
textFont = ImageFont.truetype(str(f"{ROOT_PATH}/fonts/SimSun.ttf"), size=FONTSIZE) | |
elif cls_name_lang in ["en", "ru", "es", "ar"]: | |
# English, Russian, Spanish, Arabic | |
textFont = ImageFont.truetype(str(f"{ROOT_PATH}/fonts/TimesNewRoman.ttf"), size=FONTSIZE) | |
elif cls_name_lang == "ko": | |
# Korean | |
textFont = ImageFont.truetype(str(f"{ROOT_PATH}/fonts/malgun.ttf"), size=FONTSIZE) | |
for result in results.xyxyn: | |
for i in range(len(result)): | |
id = int(i) # instance ID | |
obj_cls_index = int(result[i][5]) # category index | |
obj_cls = model_cls_name_cp[obj_cls_index] # category | |
# ------------ border coordinates ------------ | |
x0 = float(result[i][:4].tolist()[0]) | |
y0 = float(result[i][:4].tolist()[1]) | |
x1 = float(result[i][:4].tolist()[2]) | |
y1 = float(result[i][:4].tolist()[3]) | |
# ------------ Actual coordinates of the border ------------ | |
x0 = int(img_size[0] * x0) | |
y0 = int(img_size[1] * y0) | |
x1 = int(img_size[0] * x1) | |
y1 = int(img_size[1] * y1) | |
conf = float(result[i][4]) # confidence | |
# fps = f"{(1000 / float(results.t[1])):.2f}" # FPS | |
det_img = pil_draw( | |
img, | |
f"{id}-{obj_cls}:{conf:.2f}", | |
textFont, | |
[x0, y0, x1, y1], | |
FONTSIZE, | |
opt, | |
) | |
# ----------add object size---------- | |
w_obj = x1 - x0 | |
h_obj = y1 - y0 | |
area_obj = w_obj * h_obj | |
area_obj_all.append(area_obj) | |
det_json = export_json(results, model, img.size)[0] # Detection information | |
det_json_format = json.dumps(det_json, sort_keys=False, indent=4, separators=(",", ":"), ensure_ascii=False) # JSON formatting | |
if "json" not in opt: | |
det_json = None | |
# -------pdf------- | |
report = "./Det_Report.pdf" | |
if "pdf" in opt: | |
pdf_generate(f"{det_json_format}", report, GYD_VERSION) | |
else: | |
report = None | |
# --------------object size compute-------------- | |
for i in range(len(area_obj_all)): | |
if (0 < area_obj_all[i] <= 32 ** 2): | |
s_obj = s_obj + 1 | |
elif (32 ** 2 < area_obj_all[i] <= 96 ** 2): | |
m_obj = m_obj + 1 | |
elif (area_obj_all[i] > 96 ** 2): | |
l_obj = l_obj + 1 | |
sml_obj_total = s_obj + m_obj + l_obj | |
objSize_dict = {obj_style[i]: [s_obj, m_obj, l_obj][i] / sml_obj_total for i in range(3)} | |
return det_img, det_json, report, dataframe, objSize_dict | |
def main(args): | |
gr.close_all() | |
global model, model_cls_name_cp, cls_name | |
slider_step = 0.05 # sliding step | |
source = args.source | |
img_tool = args.img_tool | |
nms_conf = args.nms_conf | |
nms_iou = args.nms_iou | |
model_name = args.model_name | |
model_cfg = args.model_cfg | |
cls_name = args.cls_name | |
device = args.device | |
inference_size = args.inference_size | |
max_detnum = args.max_detnum | |
is_fonts(f"{ROOT_PATH}/fonts") # Check font files | |
# model loading | |
model = model_loading(model_name, device) | |
model_names = yaml_csv(model_cfg, "model_names") # model names | |
model_cls_name = yaml_csv(cls_name, "model_cls_name") # class name | |
model_cls_name_cp = model_cls_name.copy() # class name | |
# ------------------- Input Components ------------------- | |
inputs_img = gr.inputs.Image(image_mode="RGB", source=source, tool=img_tool, type="pil", label="original image") | |
inputs_device = gr.inputs.Radio(choices=["cuda:0", "cpu"], default=device, label="device") | |
inputs_model = gr.inputs.Dropdown(choices=model_names, default=model_name, type="value", label="model") | |
inputs_size = gr.inputs.Radio(choices=[320, 640, 1280], default=inference_size, label="inference size") | |
input_conf = gr.inputs.Slider(0, 1, step=slider_step, default=nms_conf, label="confidence threshold") | |
inputs_iou = gr.inputs.Slider(0, 1, step=slider_step, default=nms_iou, label="IoU threshold") | |
inputs_maxnum = gr.inputs.Textbox(lines=1, placeholder="Maximum number of detections", default=max_detnum, label="Maximum number of detections") | |
inputs_clsName = gr.inputs.CheckboxGroup(choices=model_cls_name, default=model_cls_name, type="index", label="category") | |
inputs_opt = gr.inputs.CheckboxGroup(choices=["label", "pdf", "json"], | |
default=["label", "pdf"], | |
type="value", | |
label="operate") | |
# Input parameters | |
inputs = [ | |
inputs_img, # input image | |
inputs_device, # device | |
inputs_model, # model | |
inputs_size, # inference size | |
input_conf, # confidence threshold | |
inputs_iou, # IoU threshold | |
inputs_maxnum, # maximum number of detections | |
inputs_clsName, # category | |
inputs_opt, # detect operations | |
] | |
# Output parameters | |
outputs_img = gr.outputs.Image(type="pil", label="Detection image") | |
outputs_json = gr.outputs.JSON(label="Detection information") | |
outputs_pdf = gr.outputs.File(label="Download test report") | |
outputs_df = gr.outputs.Dataframe(max_rows=5, overflow_row_behaviour="paginate", type="pandas", label="List of detection information") | |
outputs_objSize = gr.outputs.Label(label="Object size ratio statistics") | |
outputs = [outputs_img, outputs_json, outputs_pdf, outputs_df, outputs_objSize] | |
# title | |
title = "Gradio YOLOv5 Det v0.3" | |
# describe | |
description = "<div align='center'>Customizable target detection model, easy to install, easy to use</div>" | |
# example image | |
examples = [ | |
[ | |
"./img_example/bus.jpg", | |
"cpu", | |
"yolov5s", | |
640, | |
0.6, | |
0.5, | |
10, | |
["person", "bus"], | |
["label", "pdf"],], | |
[ | |
"./img_example/giraffe.jpg", | |
"cpu", | |
"yolov5l", | |
320, | |
0.5, | |
0.45, | |
12, | |
["giraffe"], | |
["label", "pdf"],], | |
[ | |
"./img_example/zidane.jpg", | |
"cpu", | |
"yolov5m", | |
640, | |
0.25, | |
0.5, | |
15, | |
["person", "tie"], | |
["pdf", "json"],], | |
[ | |
"./img_example/Millenial-at-work.jpg", | |
"cpu", | |
"yolov5s6", | |
1280, | |
0.5, | |
0.5, | |
20, | |
["person", "chair", "cup", "laptop"], | |
["label", "pdf"],],] | |
# interface | |
gr.Interface( | |
fn=yolo_det, | |
inputs=inputs, | |
outputs=outputs, | |
title=title, | |
description=description, | |
article="", | |
examples=examples, | |
theme="seafoam", | |
flagging_dir="run", # output directory | |
).launch( | |
inbrowser=True, # Automatically open default browser | |
show_tips=True, # Automatically display the latest features of gradio | |
) | |
if __name__ == "__main__": | |
args = parse_args() | |
main(args) |