OpenLenda / predictor.py
yutyan's picture
Add app
37f5c2f
import os
import time
from loguru import logger
import cv2
import torch
from yolox.data.data_augment import ValTransform
from yolox.data.datasets import COCO_CLASSES
from yolox.utils import postprocess, vis
class Predictor(object):
def __init__(
self,
model,
cls_names=COCO_CLASSES,
device="cpu",
fp16=False,
legacy=False,
):
self.model = model
self.cls_names = cls_names
self.num_classes = len(COCO_CLASSES)
self.confthre = 0.01
self.nmsthre = 0.01
self.test_size = (640, 640)
self.device = device
self.fp16 = fp16
self.preproc = ValTransform(legacy=legacy)
def inference(self, img, confthre=None, nmsthre=None, test_size=None):
if confthre is not None:
self.confthre = confthre
if nmsthre is not None:
self.nmsthre = nmsthre
if test_size is not None:
self.test_size = test_size
img_info = {"id": 0}
if isinstance(img, str):
img_info["file_name"] = os.path.basename(img)
img = cv2.imread(img)
else:
img_info["file_name"] = None
cv2.imwrite("test.png", img)
height, width = img.shape[:2]
img_info["height"] = height
img_info["width"] = width
img_info["raw_img"] = img
ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1])
img_info["ratio"] = ratio
img, _ = self.preproc(img, None, self.test_size)
img = torch.from_numpy(img).unsqueeze(0)
img = img.float()
if self.device == "gpu":
img = img.cuda()
if self.fp16:
img = img.half() # to FP16
with torch.no_grad():
outputs = self.model(img)
outputs = postprocess(
outputs, self.num_classes, self.confthre,
self.nmsthre
)
return outputs, img_info
def visual(self, output, img_info):
ratio = img_info["ratio"]
img = img_info["raw_img"]
if output is None:
return img
output = output.cpu()
bboxes = output[:, 0:4]
# preprocessing: resize
bboxes /= ratio
cls = output[:, 6]
scores = output[:, 4] * output[:, 5]
vis_res = vis(img, bboxes, scores, cls, self.confthre, self.cls_names)
return vis_res