import cv2 import stow import typing import numpy as np import onnxruntime as ort class FaceNet: """FaceNet class object, which can be used for simplified face recognition """ def __init__( self, detector: object, onnx_model_path: str = "models/faceNet.onnx", anchors: typing.Union[str, dict] = 'faces', force_cpu: bool = False, threshold: float = 0.5, color: tuple = (255, 255, 255), thickness: int = 2, ) -> None: """Object for face recognition Params: detector: (object) - detector object to detect faces in image onnx_model_path: (str) - path to onnx model force_cpu: (bool) - if True, onnx model will be run on CPU anchors: (str or dict) - path to directory with faces or dictionary with anchor names as keys and anchor encodings as values threshold: (float) - threshold for face recognition color: (tuple) - color of bounding box and text thickness: (int) - thickness of bounding box and text """ if not stow.exists(onnx_model_path): raise Exception(f"Model doesn't exists in {onnx_model_path}") self.detector = detector self.threshold = threshold self.color = color self.thickness = thickness providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] providers = providers if ort.get_device() == "GPU" and not force_cpu else providers[::-1] self.ort_sess = ort.InferenceSession(onnx_model_path, providers=providers) self.input_shape = self.ort_sess._inputs_meta[0].shape[1:3] self.anchors = self.load_anchors(anchors) if isinstance(anchors, str) else anchors def normalize(self, img: np.ndarray) -> np.ndarray: """Normalize image Args: img: (np.ndarray) - image to be normalized Returns: img: (np.ndarray) - normalized image """ mean, std = img.mean(), img.std() return (img - mean) / std def l2_normalize(self, x: np.ndarray, axis: int = -1, epsilon: float = 1e-10) -> np.ndarray: """l2 normalization function Args: x: (np.ndarray) - input array axis: (int) - axis to normalize epsilon: (float) - epsilon to avoid division by zero Returns: x: (np.ndarray) - normalized array """ output = x / np.sqrt(np.maximum(np.sum(np.square(x), axis=axis, keepdims=True), epsilon)) return output def detect_save_faces(self, image: np.ndarray, output_dir: str = "faces"): """Detect faces in given image and save them to output_dir Args: image: (np.ndarray) - image to be processed output_dir: (str) - directory where faces will be saved Returns: bool: (bool) - True if faces were detected and saved """ face_crops = [image[t:b, l:r] for t, l, b, r in self.detector(image, return_tlbr=True)] if face_crops == []: return False #stow.mkdir(output_dir) for index, crop in enumerate(face_crops): #output_path = stow.join(output_dir, f"face_{str(index)}.png") #cv2.imwrite(output_path, crop) #print("Crop saved to:", output_path) #self.anchors = self.load_anchors(output_dir) return crop def load_anchors(self, faces_path: str): """Generate anchors for given faces path Args: faces_path: (str) - path to directory with faces Returns: anchors: (dict) - dictionary with anchor names as keys and anchor encodings as values """ anchors = {} if not stow.exists(faces_path): return {} for face_path in stow.ls(faces_path): anchors[stow.basename(face_path)] = self.encode(cv2.imread(face_path.path)) return anchors def encode(self, face_image: np.ndarray) -> np.ndarray: """Encode face image with FaceNet model Args face_image: (np.ndarray) - face image to be encoded Returns: face_encoding: (np.ndarray) - face encoding """ face = self.normalize(face_image) face = cv2.resize(face, self.input_shape).astype(np.float32) encode = self.ort_sess.run(None, {self.ort_sess._inputs_meta[0].name: np.expand_dims(face, axis=0)})[0][0] normalized_encode = self.l2_normalize(encode) return normalized_encode def cosine_distance(self, a: np.ndarray, b: typing.Union[np.ndarray, list]) -> np.ndarray: """Cosine distance between wectors a and b Args: a: (np.ndarray) - first vector b: (np.ndarray) - second list of vectors Returns: distance: (float) - cosine distance """ if isinstance(a, list): a = np.array(a) if isinstance(b, list): b = np.array(b) return np.dot(a, b.T) / (np.linalg.norm(a) * np.linalg.norm(b)) def draw(self, image: np.ndarray, face_crops: dict): """Draw face crops on image Args: image: (np.ndarray) - image to be drawn on face_crops: (dict) - dictionary with face crops as values and face names as keys Returns: image: (np.ndarray) - image with drawn face crops """ for value in face_crops.values(): t, l, b, r = value["tlbr"] cv2.rectangle(image, (l, t), (r, b), self.color, self.thickness) cv2.putText(image, stow.name(value['name']), (l, t - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, self.color, self.thickness) return image def __call__(self, frame: np.ndarray) -> np.ndarray: """Face recognition pipeline Args: frame: (np.ndarray) - image to be processed Returns: frame: (np.ndarray) - image with drawn face recognition results """ face_crops = {index: {"name": "Unknown", "tlbr": tlbr} for index, tlbr in enumerate(self.detector(frame, return_tlbr=True))} for key, value in face_crops.items(): t, l, b, r = value["tlbr"] face_encoding = self.encode(frame[t:b, l:r]) distances = self.cosine_distance(face_encoding, list(self.anchors.values())) if np.max(distances) > self.threshold: face_crops[key]["name"] = list(self.anchors.keys())[np.argmax(distances)] frame = self.draw(frame, face_crops) return frame