|
import torch |
|
import torch.nn as nn |
|
import onnxruntime |
|
import numpy as np |
|
import argparse |
|
from utils import ( |
|
LoadImages, |
|
non_max_suppression, |
|
plot_images, |
|
output_to_target, |
|
) |
|
import sys |
|
import pathlib |
|
CURRENT_DIR = pathlib.Path(__file__).parent |
|
sys.path.append(str(CURRENT_DIR)) |
|
|
|
def preprocess(img): |
|
img = torch.from_numpy(img) |
|
img = img.float() |
|
img /= 255 |
|
return img |
|
|
|
|
|
class DFL(nn.Module): |
|
|
|
def __init__(self, c1=16): |
|
super().__init__() |
|
self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False) |
|
x = torch.arange(c1, dtype=torch.float) |
|
self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1)) |
|
self.c1 = c1 |
|
|
|
def forward(self, x): |
|
b, c, a = x.shape |
|
return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view( |
|
b, 4, a |
|
) |
|
|
|
|
|
def dist2bbox(distance, anchor_points, xywh=True, dim=-1): |
|
"""Transform distance(ltrb) to box(xywh or xyxy).""" |
|
lt, rb = torch.split(distance, 2, dim) |
|
x1y1 = anchor_points - lt |
|
x2y2 = anchor_points + rb |
|
if xywh: |
|
c_xy = (x1y1 + x2y2) / 2 |
|
wh = x2y2 - x1y1 |
|
return torch.cat((c_xy, wh), dim) |
|
return torch.cat((x1y1, x2y2), dim) |
|
|
|
|
|
def post_process(x): |
|
dfl = DFL(16) |
|
anchors = torch.tensor( |
|
np.load( |
|
"./anchors.npy", |
|
allow_pickle=True, |
|
) |
|
) |
|
strides = torch.tensor( |
|
np.load( |
|
"./strides.npy", |
|
allow_pickle=True, |
|
) |
|
) |
|
box, cls = torch.cat([xi.view(x[0].shape[0], 144, -1) for xi in x], 2).split( |
|
(16 * 4, 80), 1 |
|
) |
|
dbox = dist2bbox(dfl(box), anchors.unsqueeze(0), xywh=True, dim=1) * strides |
|
y = torch.cat((dbox, cls.sigmoid()), 1) |
|
return y, x |
|
|
|
|
|
def make_parser(): |
|
parser = argparse.ArgumentParser("onnxruntime inference sample") |
|
parser.add_argument( |
|
"-m", |
|
"--onnx_model", |
|
type=str, |
|
default="./yolov8m.onnx", |
|
help="input your onnx model.", |
|
) |
|
parser.add_argument( |
|
"-i", |
|
"--image_path", |
|
type=str, |
|
default='./demo.jpg', |
|
help="path to your input image.", |
|
) |
|
parser.add_argument( |
|
"-o", |
|
"--output_path", |
|
type=str, |
|
default='./demo_infer.jpg', |
|
help="path to your output directory.", |
|
) |
|
parser.add_argument( |
|
"--ipu", action='store_true', help='flag for ryzen ai' |
|
) |
|
parser.add_argument( |
|
"--provider_config", default='', type=str, help='provider config for ryzen ai' |
|
) |
|
return parser |
|
|
|
classnames = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', |
|
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', |
|
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', |
|
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', |
|
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', |
|
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', |
|
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', |
|
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', |
|
'hair drier', 'toothbrush'] |
|
names = {k: classnames[k] for k in range(80)} |
|
imgsz = [640, 640] |
|
|
|
|
|
if __name__ == '__main__': |
|
args = make_parser().parse_args() |
|
source = args.image_path |
|
dataset = LoadImages( |
|
source, imgsz=imgsz, stride=32, auto=False, transforms=None, vid_stride=1 |
|
) |
|
onnx_weight = args.onnx_model |
|
if args.ipu: |
|
providers = ["VitisAIExecutionProvider"] |
|
provider_options = [{"config_file": args.provider_config}] |
|
onnx_model = onnxruntime.InferenceSession(onnx_weight, providers=providers, provider_options=provider_options) |
|
else: |
|
onnx_model = onnxruntime.InferenceSession(onnx_weight) |
|
for batch in dataset: |
|
path, im, im0s, vid_cap, s = batch |
|
im = preprocess(im) |
|
if len(im.shape) == 3: |
|
im = im[None] |
|
|
|
|
|
outputs = onnx_model.run(None, {onnx_model.get_inputs()[0].name: im.permute(0, 2, 3, 1).cpu().numpy()}) |
|
outputs = [torch.tensor(item).permute(0, 3, 1, 2) for item in outputs] |
|
preds = post_process(outputs) |
|
preds = non_max_suppression( |
|
preds, 0.25, 0.7, agnostic=False, max_det=300, classes=None |
|
) |
|
plot_images( |
|
im, |
|
*output_to_target(preds, max_det=15), |
|
source, |
|
fname=args.output_path, |
|
names=names, |
|
) |
|
|