FastSAM

Sleeping

App Files Files Community

FastSAM / ultralytics /nn /autoshape.py

An-619

Upload 161 files (#5)

a2b0f6f verified 8 months ago

raw

history blame

12.5 kB

	# Ultralytics YOLO 🚀, AGPL-3.0 license
	"""
	Common modules
	"""

	from copy import copy
	from pathlib import Path

	import cv2
	import numpy as np
	import requests
	import torch
	import torch.nn as nn
	from PIL import Image, ImageOps
	from torch.cuda import amp

	from ultralytics.nn.autobackend import AutoBackend
	from ultralytics.yolo.data.augment import LetterBox
	from ultralytics.yolo.utils import LOGGER, colorstr
	from ultralytics.yolo.utils.files import increment_path
	from ultralytics.yolo.utils.ops import Profile, make_divisible, non_max_suppression, scale_boxes, xyxy2xywh
	from ultralytics.yolo.utils.plotting import Annotator, colors, save_one_box
	from ultralytics.yolo.utils.torch_utils import copy_attr, smart_inference_mode


	class AutoShape(nn.Module):
	"""YOLOv8 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS."""
	conf = 0.25 # NMS confidence threshold
	iou = 0.45 # NMS IoU threshold
	agnostic = False # NMS class-agnostic
	multi_label = False # NMS multiple labels per box
	classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
	max_det = 1000 # maximum number of detections per image
	amp = False # Automatic Mixed Precision (AMP) inference

	def __init__(self, model, verbose=True):
	"""Initializes object and copies attributes from model object."""
	super().__init__()
	if verbose:
	LOGGER.info('Adding AutoShape... ')
	copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes
	self.dmb = isinstance(model, AutoBackend) # DetectMultiBackend() instance
	self.pt = not self.dmb or model.pt # PyTorch model
	self.model = model.eval()
	if self.pt:
	m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
	m.inplace = False # Detect.inplace=False for safe multithread inference
	m.export = True # do not output loss values

	def _apply(self, fn):
	"""Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers."""
	self = super()._apply(fn)
	if self.pt:
	m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
	m.stride = fn(m.stride)
	m.grid = list(map(fn, m.grid))
	if isinstance(m.anchor_grid, list):
	m.anchor_grid = list(map(fn, m.anchor_grid))
	return self

	@smart_inference_mode()
	def forward(self, ims, size=640, augment=False, profile=False):
	"""Inference from various sources. For size(height=640, width=1280), RGB images example inputs are:."""
	# file: ims = 'data/images/zidane.jpg' # str or PosixPath
	# URI: = 'https://ultralytics.com/images/zidane.jpg'
	# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
	# PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
	# numpy: = np.zeros((640,1280,3)) # HWC
	# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
	# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images

	dt = (Profile(), Profile(), Profile())
	with dt[0]:
	if isinstance(size, int): # expand
	size = (size, size)
	p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device) # param
	autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference
	if isinstance(ims, torch.Tensor): # torch
	with amp.autocast(autocast):
	return self.model(ims.to(p.device).type_as(p), augment=augment) # inference

	# Preprocess
	n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims]) # number, list of images
	shape0, shape1, files = [], [], [] # image and inference shapes, filenames
	for i, im in enumerate(ims):
	f = f'image{i}' # filename
	if isinstance(im, (str, Path)): # filename or uri
	im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
	im = np.asarray(ImageOps.exif_transpose(im))
	elif isinstance(im, Image.Image): # PIL Image
	im, f = np.asarray(ImageOps.exif_transpose(im)), getattr(im, 'filename', f) or f
	files.append(Path(f).with_suffix('.jpg').name)
	if im.shape[0] < 5: # image in CHW
	im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
	im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) # enforce 3ch input
	s = im.shape[:2] # HWC
	shape0.append(s) # image shape
	g = max(size) / max(s) # gain
	shape1.append([y * g for y in s])
	ims[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
	shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)] if self.pt else size # inf shape
	x = [LetterBox(shape1, auto=False)(image=im)['img'] for im in ims] # pad
	x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW
	x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32

	with amp.autocast(autocast):
	# Inference
	with dt[1]:
	y = self.model(x, augment=augment) # forward

	# Postprocess
	with dt[2]:
	y = non_max_suppression(y if self.dmb else y[0],
	self.conf,
	self.iou,
	self.classes,
	self.agnostic,
	self.multi_label,
	max_det=self.max_det) # NMS
	for i in range(n):
	scale_boxes(shape1, y[i][:, :4], shape0[i])

	return Detections(ims, y, files, dt, self.names, x.shape)


	class Detections:
	""" YOLOv8 detections class for inference results"""

	def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
	"""Initialize object attributes for YOLO detection results."""
	super().__init__()
	d = pred[0].device # device
	gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims] # normalizations
	self.ims = ims # list of images as numpy arrays
	self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
	self.names = names # class names
	self.files = files # image filenames
	self.times = times # profiling times
	self.xyxy = pred # xyxy pixels
	self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
	self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
	self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
	self.n = len(self.pred) # number of images (batch size)
	self.t = tuple(x.t / self.n * 1E3 for x in times) # timestamps (ms)
	self.s = tuple(shape) # inference BCHW shape

	def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
	"""Return performance metrics and optionally cropped/save images or results."""
	s, crops = '', []
	for i, (im, pred) in enumerate(zip(self.ims, self.pred)):
	s += f'\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string
	if pred.shape[0]:
	for c in pred[:, -1].unique():
	n = (pred[:, -1] == c).sum() # detections per class
	s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
	s = s.rstrip(', ')
	if show or save or render or crop:
	annotator = Annotator(im, example=str(self.names))
	for *box, conf, cls in reversed(pred): # xyxy, confidence, class
	label = f'{self.names[int(cls)]} {conf:.2f}'
	if crop:
	file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
	crops.append({
	'box': box,
	'conf': conf,
	'cls': cls,
	'label': label,
	'im': save_one_box(box, im, file=file, save=save)})
	else: # all others
	annotator.box_label(box, label if labels else '', color=colors(cls))
	im = annotator.im
	else:
	s += '(no detections)'

	im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
	if show:
	im.show(self.files[i]) # show
	if save:
	f = self.files[i]
	im.save(save_dir / f) # save
	if i == self.n - 1:
	LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
	if render:
	self.ims[i] = np.asarray(im)
	if pprint:
	s = s.lstrip('\n')
	return f'{s}\nSpeed: %.1fms preprocess, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t
	if crop:
	if save:
	LOGGER.info(f'Saved results to {save_dir}\n')
	return crops

	def show(self, labels=True):
	"""Displays YOLO results with detected bounding boxes."""
	self._run(show=True, labels=labels) # show results

	def save(self, labels=True, save_dir='runs/detect/exp', exist_ok=False):
	"""Save detection results with optional labels to specified directory."""
	save_dir = increment_path(save_dir, exist_ok, mkdir=True) # increment save_dir
	self._run(save=True, labels=labels, save_dir=save_dir) # save results

	def crop(self, save=True, save_dir='runs/detect/exp', exist_ok=False):
	"""Crops images into detections and saves them if 'save' is True."""
	save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else None
	return self._run(crop=True, save=save, save_dir=save_dir) # crop results

	def render(self, labels=True):
	"""Renders detected objects and returns images."""
	self._run(render=True, labels=labels) # render results
	return self.ims

	def pandas(self):
	"""Return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])."""
	import pandas
	new = copy(self) # return copy
	ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
	cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
	for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
	a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
	setattr(new, k, [pandas.DataFrame(x, columns=c) for x in a])
	return new

	def tolist(self):
	"""Return a list of Detections objects, i.e. 'for result in results.tolist():'."""
	r = range(self.n) # iterable
	x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
	# for d in x:
	# for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
	# setattr(d, k, getattr(d, k)[0]) # pop out of list
	return x

	def print(self):
	"""Print the results of the `self._run()` function."""
	LOGGER.info(self.__str__())

	def __len__(self): # override len(results)
	return self.n

	def __str__(self): # override print(results)
	return self._run(pprint=True) # print results

	def __repr__(self):
	"""Returns a printable representation of the object."""
	return f'YOLOv8 {self.__class__} instance\n' + self.__str__()