Spaces:
Build error
Build error
File size: 6,626 Bytes
a9640c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
import cv2
import stow
import typing
import numpy as np
import onnxruntime as ort
class FaceNet:
"""FaceNet class object, which can be used for simplified face recognition
"""
def __init__(
self,
detector: object,
onnx_model_path: str = "models/faceNet.onnx",
anchors: typing.Union[str, dict] = 'faces',
force_cpu: bool = False,
threshold: float = 0.5,
color: tuple = (255, 255, 255),
thickness: int = 2,
) -> None:
"""Object for face recognition
Params:
detector: (object) - detector object to detect faces in image
onnx_model_path: (str) - path to onnx model
force_cpu: (bool) - if True, onnx model will be run on CPU
anchors: (str or dict) - path to directory with faces or dictionary with anchor names as keys and anchor encodings as values
threshold: (float) - threshold for face recognition
color: (tuple) - color of bounding box and text
thickness: (int) - thickness of bounding box and text
"""
if not stow.exists(onnx_model_path):
raise Exception(f"Model doesn't exists in {onnx_model_path}")
self.detector = detector
self.threshold = threshold
self.color = color
self.thickness = thickness
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
providers = providers if ort.get_device() == "GPU" and not force_cpu else providers[::-1]
self.ort_sess = ort.InferenceSession(onnx_model_path, providers=providers)
self.input_shape = self.ort_sess._inputs_meta[0].shape[1:3]
self.anchors = self.load_anchors(anchors) if isinstance(anchors, str) else anchors
def normalize(self, img: np.ndarray) -> np.ndarray:
"""Normalize image
Args:
img: (np.ndarray) - image to be normalized
Returns:
img: (np.ndarray) - normalized image
"""
mean, std = img.mean(), img.std()
return (img - mean) / std
def l2_normalize(self, x: np.ndarray, axis: int = -1, epsilon: float = 1e-10) -> np.ndarray:
"""l2 normalization function
Args:
x: (np.ndarray) - input array
axis: (int) - axis to normalize
epsilon: (float) - epsilon to avoid division by zero
Returns:
x: (np.ndarray) - normalized array
"""
output = x / np.sqrt(np.maximum(np.sum(np.square(x), axis=axis, keepdims=True), epsilon))
return output
def detect_save_faces(self, image: np.ndarray, output_dir: str = "faces"):
"""Detect faces in given image and save them to output_dir
Args:
image: (np.ndarray) - image to be processed
output_dir: (str) - directory where faces will be saved
Returns:
bool: (bool) - True if faces were detected and saved
"""
face_crops = [image[t:b, l:r] for t, l, b, r in self.detector(image, return_tlbr=True)]
if face_crops == []:
return False
#stow.mkdir(output_dir)
for index, crop in enumerate(face_crops):
#output_path = stow.join(output_dir, f"face_{str(index)}.png")
#cv2.imwrite(output_path, crop)
#print("Crop saved to:", output_path)
#self.anchors = self.load_anchors(output_dir)
return crop
def load_anchors(self, faces_path: str):
"""Generate anchors for given faces path
Args:
faces_path: (str) - path to directory with faces
Returns:
anchors: (dict) - dictionary with anchor names as keys and anchor encodings as values
"""
anchors = {}
if not stow.exists(faces_path):
return {}
for face_path in stow.ls(faces_path):
anchors[stow.basename(face_path)] = self.encode(cv2.imread(face_path.path))
return anchors
def encode(self, face_image: np.ndarray) -> np.ndarray:
"""Encode face image with FaceNet model
Args
face_image: (np.ndarray) - face image to be encoded
Returns:
face_encoding: (np.ndarray) - face encoding
"""
face = self.normalize(face_image)
face = cv2.resize(face, self.input_shape).astype(np.float32)
encode = self.ort_sess.run(None, {self.ort_sess._inputs_meta[0].name: np.expand_dims(face, axis=0)})[0][0]
normalized_encode = self.l2_normalize(encode)
return normalized_encode
def cosine_distance(self, a: np.ndarray, b: typing.Union[np.ndarray, list]) -> np.ndarray:
"""Cosine distance between wectors a and b
Args:
a: (np.ndarray) - first vector
b: (np.ndarray) - second list of vectors
Returns:
distance: (float) - cosine distance
"""
if isinstance(a, list):
a = np.array(a)
if isinstance(b, list):
b = np.array(b)
return np.dot(a, b.T) / (np.linalg.norm(a) * np.linalg.norm(b))
def draw(self, image: np.ndarray, face_crops: dict):
"""Draw face crops on image
Args:
image: (np.ndarray) - image to be drawn on
face_crops: (dict) - dictionary with face crops as values and face names as keys
Returns:
image: (np.ndarray) - image with drawn face crops
"""
for value in face_crops.values():
t, l, b, r = value["tlbr"]
cv2.rectangle(image, (l, t), (r, b), self.color, self.thickness)
cv2.putText(image, stow.name(value['name']), (l, t - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, self.color, self.thickness)
return image
def __call__(self, frame: np.ndarray) -> np.ndarray:
"""Face recognition pipeline
Args:
frame: (np.ndarray) - image to be processed
Returns:
frame: (np.ndarray) - image with drawn face recognition results
"""
face_crops = {index: {"name": "Unknown", "tlbr": tlbr} for index, tlbr in enumerate(self.detector(frame, return_tlbr=True))}
for key, value in face_crops.items():
t, l, b, r = value["tlbr"]
face_encoding = self.encode(frame[t:b, l:r])
distances = self.cosine_distance(face_encoding, list(self.anchors.values()))
if np.max(distances) > self.threshold:
face_crops[key]["name"] = list(self.anchors.keys())[np.argmax(distances)]
frame = self.draw(frame, face_crops)
return frame |