Martlgap commited on
Commit
dc70a00
·
1 Parent(s): c59c6ab

testing minimum version

Browse files
app.py CHANGED
@@ -2,291 +2,414 @@ import streamlit as st
2
  import time
3
  from typing import List
4
  from streamlit_webrtc import webrtc_streamer, WebRtcMode
5
- import logging
6
  import av
7
- import queue
8
- from streamlit_toggle import st_toggle_switch
9
- import pandas as pd
10
- from tools.nametypes import Stats, Detection, Identity, Match
11
- from tools.utils import get_ice_servers, rgb, format_dflist
12
- from tools.face_detection import FaceDetection
13
- from tools.face_recognition import FaceRecognition
14
- from tools.annotation import Annotation
15
- from tools.gallery import init_gallery
16
- from tools.pca import pca
17
 
18
 
19
- # Set logging level to error (To avoid getting spammed by queue warnings etc.)
20
- logger = logging.getLogger(__name__)
21
- logging.basicConfig(level=logging.ERROR)
22
 
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  # Set page layout for streamlit to wide
25
- st.set_page_config(layout="wide", page_title="FaceID App Demo", page_icon=":sunglasses:")
26
- with st.sidebar:
27
- st.markdown("# Settings")
28
- face_rec_on = st_toggle_switch(
29
- "Live Face Recognition",
30
- key="activate_face_rec",
31
- default_value=True,
32
- active_color=rgb(255, 75, 75),
33
- track_color=rgb(50, 50, 50),
34
- label_after=True,
35
- )
36
 
37
- with st.expander("Advanced Settings", expanded=False):
38
- st.markdown("## Webcam & Stream")
39
- resolution = st.selectbox(
40
- "Webcam Resolution",
41
- [(1920, 1080), (1280, 720), (640, 360)],
42
- index=2,
43
- )
44
- st.markdown("Note: To change the resolution, you have to restart the stream.")
45
 
46
- ice_server = st.selectbox("ICE Server", ["twilio", "metered"], index=1)
47
- st.markdown(
48
- "Note: metered is a free server with limited bandwidth, and can take a while to connect. Twilio is a paid service and is payed by me, so please don't abuse it."
49
- )
50
- st.markdown("---")
51
- st.markdown("## Face Detection")
52
- detection_min_face_size = st.slider("Min Face Size", min_value=5, max_value=120, value=40)
53
- detection_scale_factor = st.slider("Scale Factor", min_value=0.1, max_value=1.0, value=0.7)
54
- detection_confidence = st.slider("Min Detection Confidence", min_value=0.5, max_value=1.0, value=0.9)
55
- st.markdown("---")
56
- st.markdown("## Face Recognition")
57
- similarity_threshold = st.slider("Similarity Threshold", min_value=0.0, max_value=2.0, value=0.67)
58
- st.markdown(
59
- "This sets a maximum distance for the cosine similarity between the embeddings of the detected face and the gallery images. If the distance is below the threshold, the face is recognized as the gallery image with the lowest distance. If the distance is above the threshold, the face is not recognized."
60
- )
61
- model_name = st.selectbox("Model", ["mobileNet", "resNet"], index=0)
62
- st.markdown(
63
- "Note: The mobileNet model is smaller and faster, but less accurate. The resNet50 model is bigger and slower, but more accurate."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
- st.markdown("# Face Gallery")
67
- files = st.sidebar.file_uploader(
68
- "Upload images to gallery",
69
- type=["png", "jpg", "jpeg"],
70
- accept_multiple_files=True,
71
- label_visibility="collapsed",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  )
73
 
74
- with st.expander("Uploaded Images", expanded=True):
75
- if files:
76
- st.image(files, width=112, caption=files)
77
- else:
78
- st.info("No images uploaded yet.")
 
 
 
 
 
 
79
 
 
 
 
 
 
 
 
 
80
 
81
- gallery = init_gallery(
82
- files,
83
- min_detections_conf=detection_confidence,
84
- min_similarity=similarity_threshold,
85
- model_name=model_name,
86
- )
 
 
 
 
 
 
 
87
 
88
- face_detector = FaceDetection(
89
- min_detections_conf=detection_confidence,
90
- min_face_size=detection_min_face_size,
91
- scale_factor=detection_scale_factor,
92
- )
93
- face_recognizer = FaceRecognition(model_name=model_name, min_similarity=similarity_threshold)
94
- annotator = Annotation()
 
 
 
 
 
 
95
 
96
- transfer_queue: "queue.Queue[Stats, List[Detection], List[Identity], List[Match]]" = queue.Queue()
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
- def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
100
- # Initialize detections
101
- detections, identities, matches = [], [], []
 
 
 
 
 
 
 
 
 
 
102
 
103
- # Initialize stats
104
- stats = Stats()
105
 
106
- # Start timer for FPS calculation
107
- frame_start = time.time()
108
 
 
109
  # Convert frame to numpy array
110
  frame = frame.to_ndarray(format="rgb24")
111
 
112
- # Get frame resolution and add to stats
113
- resolution = frame.shape
114
- stats = stats._replace(resolution=resolution)
115
-
116
- if face_rec_on:
117
- # Run face detection
118
- start = time.time()
119
- frame, detections = face_detector(frame)
120
- stats = stats._replace(num_faces=len(detections) if detections else 0)
121
- stats = stats._replace(detection=(time.time() - start) * 1000)
122
 
123
- # Run face recognition
124
- start = time.time()
125
- identities = face_recognizer(frame, detections)
126
- stats = stats._replace(recognition=(time.time() - start) * 1000)
127
 
128
- # Do matching
129
- start = time.time()
130
- matches = face_recognizer.find_matches(identities, gallery)
131
- stats = stats._replace(matching=(time.time() - start) * 1000)
132
 
133
- # Draw annotations
134
- start = time.time()
135
- frame = annotator(frame, detections, identities, matches, gallery)
136
- stats = stats._replace(annotation=(time.time() - start) * 1000)
137
 
138
  # Convert frame back to av.VideoFrame
139
  frame = av.VideoFrame.from_ndarray(frame, format="rgb24")
140
 
141
- # Calculate FPS and add to stats
142
- stats = stats._replace(fps=1 / (time.time() - frame_start))
143
 
144
- # Send data to other thread
145
- transfer_queue.put_nowait([stats, detections, identities, matches])
146
 
147
- return frame
 
 
 
 
 
 
 
 
148
 
 
 
 
 
 
149
 
150
- # Streamlit app
151
- st.title("Live Webcam Face Recognition")
152
 
153
- st.markdown("**Stream Stats**")
154
- disp_stats = st.info("No streaming statistics yet, please start the stream.")
155
-
156
- ctx = webrtc_streamer(
157
- key="FaceIDAppDemo",
158
- mode=WebRtcMode.SENDRECV,
159
- rtc_configuration={"iceServers": get_ice_servers(name=ice_server)},
160
- video_frame_callback=video_frame_callback,
161
- media_stream_constraints={
162
- "video": {
163
- "width": {
164
- "min": resolution[0],
165
- "ideal": resolution[0],
166
- "max": resolution[0],
167
- },
168
- "height": {
169
- "min": resolution[1],
170
- "ideal": resolution[1],
171
- "max": resolution[1],
172
- },
173
- },
174
- "audio": False,
175
- },
176
- async_processing=True,
177
- )
178
 
179
- tab_recognition, tab_metrics, tab_pca = st.tabs(["Recognized Identities", "Recognition Metrics", "Live PCAs"])
180
-
181
-
182
- with tab_recognition:
183
- # Display Gallery and Recognized Identities
184
- col1, col2 = st.columns(2)
185
- col1.markdown("**Gallery Identities**")
186
- disp_identities_gal = col1.info("No gallery images uploaded yet ...")
187
- col2.markdown("**Recognized Identities**")
188
- disp_identities_rec = col2.info("No recognized identities yet ...")
189
-
190
- with tab_metrics:
191
- # Display Detections and Identities
192
- st.markdown("**Detection Metrics**")
193
- disp_detection_metrics = st.info("No detected faces yet ...")
194
-
195
- # Display Recognition Metrics
196
- st.markdown("**Recognition Metrics**")
197
- disp_recognition_metrics = st.info("No recognized identities yet ...")
198
-
199
- with tab_pca:
200
- # Display 2D and 3D PCA
201
- col1, col2 = st.columns(2)
202
- col1.markdown("**PCA 2D**")
203
- disp_pca3d = col1.info("Only available if more than 1 recognized face ...")
204
- col2.markdown("**PCA 3D**")
205
- disp_pca2d = col2.info("Only available if more than 1 recognized face ...")
206
- freeze_pcas = st.button("Freeze PCAs for Interaction", key="reset_pca")
207
-
208
- # Show PCAs
209
- if freeze_pcas and gallery:
210
- col1, col2 = st.columns(2)
211
- if len(st.session_state.matches) > 1:
212
- col1.plotly_chart(
213
- pca(
214
- st.session_state.matches,
215
- st.session_state.identities,
216
- gallery,
217
- dim=3,
218
- ),
219
- use_container_width=True,
220
- )
221
- col2.plotly_chart(
222
- pca(
223
- st.session_state.matches,
224
- st.session_state.identities,
225
- gallery,
226
- dim=2,
227
- ),
228
- use_container_width=True,
229
  )
230
 
 
 
 
 
 
231
 
232
- # Show Gallery Identities
233
- if gallery:
234
- disp_identities_gal.image(
235
- image=[identity.face_aligned for identity in gallery],
236
- caption=[match.name for match in gallery],
 
 
237
  )
238
- else:
239
- disp_identities_gal.info("No gallery images uploaded yet ...")
240
 
 
 
241
 
242
- # Display Live Stats
243
  if ctx.state.playing:
244
- while True:
245
- # Retrieve data from other thread
246
- stats, detections, identities, matches = transfer_queue.get()
247
-
248
- # Save for PCA Snapshot
249
- st.session_state.identities = identities
250
- st.session_state.matches = matches
251
-
252
- # Show Stats
253
- disp_stats.dataframe(
254
- pd.DataFrame([stats]).applymap(lambda x: (format_dflist(x))),
255
- use_container_width=True,
256
- )
257
-
258
- # Show Detections Metrics
259
- if detections:
260
- disp_detection_metrics.dataframe(
261
- pd.DataFrame(detections).applymap(lambda x: (format_dflist(x))),
262
- use_container_width=True,
263
- )
264
- else:
265
- disp_detection_metrics.info("No detected faces yet ...")
266
-
267
- # Show Match Metrics
268
- if matches:
269
- disp_recognition_metrics.dataframe(
270
- pd.DataFrame(matches).applymap(lambda x: (format_dflist(x))),
271
- use_container_width=True,
272
- )
273
- else:
274
- disp_recognition_metrics.info("No recognized identities yet ...")
275
-
276
- if len(matches) > 1:
277
- disp_pca3d.plotly_chart(pca(matches, identities, gallery, dim=3), use_container_width=True)
278
- disp_pca2d.plotly_chart(pca(matches, identities, gallery, dim=2), use_container_width=True)
279
- else:
280
- disp_pca3d.info("Only available if more than 1 recognized face ...")
281
- disp_pca2d.info("Only available if more than 1 recognized face ...")
282
-
283
- # Show Recognized Identities
284
- if matches:
285
- disp_identities_rec.image(
286
- image=[identities[match.identity_idx].face_aligned for match in matches],
287
- caption=[gallery[match.gallery_idx].name for match in matches],
288
- )
289
- else:
290
- disp_identities_rec.info("No recognized identities yet ...")
291
 
292
- # BUG Recognized Identity Image is not updating on cloud version? (works on local!!!)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import time
3
  from typing import List
4
  from streamlit_webrtc import webrtc_streamer, WebRtcMode
 
5
  import av
6
+ import numpy as np
7
+ import onnxruntime as rt
8
+ import threading
9
+ import mediapipe as mp
10
+ import os
11
+ from twilio.rest import Client
12
+ import cv2
13
+ from skimage.transform import SimilarityTransform
14
+ from types import SimpleNamespace
15
+ from sklearn.metrics.pairwise import cosine_distances
16
 
17
 
18
+ class Detection(SimpleNamespace):
19
+ bbox: List[List[float]] = None
20
+ landmarks: List[List[float]] = None
21
 
22
 
23
+ class Identity(SimpleNamespace):
24
+ detection: Detection = Detection()
25
+ name: str = None
26
+ embedding: np.ndarray = None
27
+ face: np.ndarray = None
28
+
29
+
30
+ class Match(SimpleNamespace):
31
+ subject_id: Identity = Identity()
32
+ gallery_id: Identity = Identity()
33
+ distance: float = None
34
+ name: str = None
35
+
36
+
37
+ class Grabber(object):
38
+ def __init__(self, video_receiver) -> None:
39
+ self.currentFrame = None
40
+ self.capture = video_receiver
41
+ self.thread = threading.Thread(target=self.update_frame)
42
+ self.thread.daemon = True
43
+
44
+ def update_frame(self) -> None:
45
+ while True:
46
+ self.currentFrame = self.capture.get_frame()
47
+
48
+ def get_frame(self) -> av.VideoFrame:
49
+ return self.currentFrame
50
+
51
+
52
+ # Similarity threshold for face matching
53
+ SIMILARITY_THRESHOLD = 1.2
54
+
55
+ # Get twilio ice server configuration using twilio credentials from environment variables (set in streamlit secrets)
56
+ # Ref: https://www.twilio.com/docs/stun-turn/api
57
+ ICE_SERVERS = Client(os.environ["TWILIO_ACCOUNT_SID"], os.environ["TWILIO_AUTH_TOKEN"]).tokens.create().ice_servers
58
+
59
  # Set page layout for streamlit to wide
60
+ st.set_page_config(layout="wide", page_title="Live Face Recognition", page_icon=":sunglasses:")
 
 
 
 
 
 
 
 
 
 
61
 
62
+ # Streamlit app
63
+ st.title("Live Webcam Face Recognition")
 
 
 
 
 
 
64
 
65
+ st.markdown("**Live Stream**")
66
+ ctx_container = st.container()
67
+ stream_container = st.empty()
68
+
69
+ st.markdown("**Matches**")
70
+ matches_container = st.info("No matches found yet ...")
71
+
72
+
73
+ # Init face detector and face recognizer
74
+ face_recognizer = rt.InferenceSession("model.fixed.onnx", providers=rt.get_available_providers())
75
+ face_detector = mp.solutions.face_mesh.FaceMesh(
76
+ refine_landmarks=True,
77
+ min_detection_confidence=0.5,
78
+ min_tracking_confidence=0.5,
79
+ max_num_faces=5,
80
+ )
81
+
82
+
83
+ def detect_faces(frame: np.ndarray) -> List[Detection]:
84
+ # Process the frame with the face detector
85
+ result = face_detector.process(frame)
86
+
87
+ # Initialize an empty list to store the detected faces
88
+ detections = []
89
+
90
+ # Check if any faces were detected
91
+ if result.multi_face_landmarks:
92
+ # Iterate over each detected face
93
+ for count, detection in enumerate(result.multi_face_landmarks):
94
+ # Select 5 Landmarks
95
+ five_landmarks = np.asarray(detection.landmark)[[470, 475, 1, 57, 287]]
96
+
97
+ # Extract the x and y coordinates of the landmarks of interest
98
+ landmarks = [[landmark.x * frame.shape[1], landmark.y * frame.shape[0]] for landmark in five_landmarks]
99
+
100
+ # Extract the x and y coordinates of all landmarks
101
+ all_x_coords = [landmark.x * frame.shape[1] for landmark in detection.landmark]
102
+ all_y_coords = [landmark.y * frame.shape[0] for landmark in detection.landmark]
103
+
104
+ # Compute the bounding box of the face
105
+ x_min, x_max = int(min(all_x_coords)), int(max(all_x_coords))
106
+ y_min, y_max = int(min(all_y_coords)), int(max(all_y_coords))
107
+ bbox = [[x_min, y_min], [x_max, y_max]]
108
+
109
+ # Create a Detection object for the face
110
+ detection = Detection(
111
+ idx=count,
112
+ bbox=bbox,
113
+ landmarks=landmarks,
114
+ confidence=None,
115
+ )
116
+
117
+ # Add the detection to the list
118
+ detections.append(detection)
119
+
120
+ # Return the list of detections
121
+ return detections
122
+
123
+
124
+ def recognize_faces(frame: np.ndarray, detections: List[Detection]) -> List[Identity]:
125
+ if not detections:
126
+ return []
127
+
128
+ identities = []
129
+ for detection in detections:
130
+ # ALIGNMENT -----------------------------------------------------------
131
+ # Target landmark coordinates (as used in training)
132
+ landmarks_target = np.array(
133
+ [
134
+ [38.2946, 51.6963],
135
+ [73.5318, 51.5014],
136
+ [56.0252, 71.7366],
137
+ [41.5493, 92.3655],
138
+ [70.7299, 92.2041],
139
+ ],
140
+ dtype=np.float32,
141
  )
142
+ tform = SimilarityTransform()
143
+ tform.estimate(detection.landmarks, landmarks_target)
144
+ tmatrix = tform.params[0:2, :]
145
+ face_aligned = cv2.warpAffine(frame, tmatrix, (112, 112), borderValue=0.0)
146
+ # ---------------------------------------------------------------------
147
+
148
+ # INFERENCE -----------------------------------------------------------
149
+ # Inference face embeddings with onnxruntime
150
+ input_image = (np.asarray([face_aligned]).astype(np.float32) / 255.0).clip(0.0, 1.0)
151
+ embedding = face_recognizer.run(None, {"input_image": input_image})[0][0]
152
+ # ---------------------------------------------------------------------
153
+
154
+ # Create Identity object
155
+ identities.append(Identity(detection=detection, embedding=embedding, face=face_aligned))
156
+
157
+ return identities
158
+
159
+
160
+ def match_faces(subjects: List[Identity], gallery: List[Identity]) -> List[Match]:
161
+ if len(gallery) == 0 or len(subjects) == 0:
162
+ return []
163
+
164
+ # Get Embeddings
165
+ embs_gal = np.asarray([identity.embedding for identity in gallery])
166
+ embs_det = np.asarray([identity.embedding for identity in subjects])
167
+
168
+ # Calculate Cosine Distances
169
+ cos_distances = cosine_distances(embs_det, embs_gal)
170
+
171
+ # Find Matches
172
+ matches = []
173
+ for ident_idx, identity in enumerate(subjects):
174
+ dists_to_identity = cos_distances[ident_idx]
175
+ idx_min = np.argmin(dists_to_identity)
176
+ if dists_to_identity[idx_min] < SIMILARITY_THRESHOLD:
177
+ matches.append(
178
+ Match(
179
+ subject_id=identity,
180
+ gallery_id=gallery[idx_min],
181
+ distance=dists_to_identity[idx_min],
182
+ )
183
+ )
184
 
185
+ # Sort Matches by identity_idx
186
+ matches = sorted(matches, key=lambda match: match.gallery_id.name)
187
+
188
+ return matches
189
+
190
+
191
+ def draw_annotations(frame: np.ndarray, detections: List[Detection], matches: List[Match]) -> np.ndarray:
192
+ global timestamp
193
+ shape = np.asarray(frame.shape[:2][::-1])
194
+
195
+ # Upscale frame to 1080p for better visualization of drawn annotations
196
+ frame = cv2.resize(frame, (1920, 1080))
197
+ upscale_factor = np.asarray([1920 / shape[0], 1080 / shape[1]])
198
+ shape = np.asarray(frame.shape[:2][::-1])
199
+
200
+ # Make frame writeable (for better performance)
201
+ frame.flags.writeable = True
202
+
203
+ fps = 1 / (time.time() - timestamp)
204
+ timestamp = time.time()
205
+
206
+ # Draw FPS
207
+ cv2.putText(
208
+ frame,
209
+ f"FPS: {fps:.1f}",
210
+ (20, 40),
211
+ cv2.FONT_HERSHEY_SIMPLEX,
212
+ 1,
213
+ (0, 255, 0),
214
+ 2,
215
  )
216
 
217
+ # Draw Detections
218
+ for detection in detections:
219
+ # Draw Landmarks
220
+ for landmark in detection.landmarks:
221
+ cv2.circle(
222
+ frame,
223
+ (landmark * upscale_factor).astype(int),
224
+ 2,
225
+ (255, 255, 255),
226
+ -1,
227
+ )
228
 
229
+ # Draw Bounding Box
230
+ cv2.rectangle(
231
+ frame,
232
+ (detection.bbox[0] * upscale_factor).astype(int),
233
+ (detection.bbox[1] * upscale_factor).astype(int),
234
+ (255, 0, 0),
235
+ 2,
236
+ )
237
 
238
+ # Draw Index
239
+ cv2.putText(
240
+ frame,
241
+ str(detection.idx),
242
+ (
243
+ ((detection.bbox[1][0] + 2) * upscale_factor[0]).astype(int),
244
+ ((detection.bbox[1][1] + 2) * upscale_factor[1]).astype(int),
245
+ ),
246
+ cv2.LINE_AA,
247
+ 0.5,
248
+ (0, 0, 0),
249
+ 2,
250
+ )
251
 
252
+ # Draw Matches
253
+ for match in matches:
254
+ detection = match.subject_id.detection
255
+ name = match.gallery_id.name
256
+
257
+ # Draw Bounding Box in green
258
+ cv2.rectangle(
259
+ frame,
260
+ (detection.bbox[0] * upscale_factor).astype(int),
261
+ (detection.bbox[1] * upscale_factor).astype(int),
262
+ (0, 255, 0),
263
+ 2,
264
+ )
265
 
266
+ # Draw Banner
267
+ cv2.rectangle(
268
+ frame,
269
+ (
270
+ (detection.bbox[0][0] * upscale_factor[0]).astype(int),
271
+ (detection.bbox[0][1] * upscale_factor[1] - (shape[1] // 25)).astype(int),
272
+ ),
273
+ (
274
+ (detection.bbox[1][0] * upscale_factor[0]).astype(int),
275
+ (detection.bbox[0][1] * upscale_factor[1]).astype(int),
276
+ ),
277
+ (255, 255, 255),
278
+ -1,
279
+ )
280
 
281
+ # Draw Name
282
+ cv2.putText(
283
+ frame,
284
+ name,
285
+ (
286
+ ((detection.bbox[0][0] + shape[0] // 400) * upscale_factor[0]).astype(int),
287
+ ((detection.bbox[0][1] - shape[1] // 50) * upscale_factor[1]).astype(int),
288
+ ),
289
+ cv2.LINE_AA,
290
+ 0.7,
291
+ (0, 0, 0),
292
+ 2,
293
+ )
294
 
295
+ # Draw Distance
296
+ cv2.putText(
297
+ frame,
298
+ f" Distance: {match.distance:.2f}",
299
+ (
300
+ ((detection.bbox[0][0] + shape[0] // 400) * upscale_factor[0]).astype(int),
301
+ ((detection.bbox[0][1] - shape[1] // 350) * upscale_factor[1]).astype(int),
302
+ ),
303
+ cv2.LINE_AA,
304
+ 0.5,
305
+ (0, 0, 0),
306
+ 2,
307
+ )
308
 
309
+ return frame
 
310
 
 
 
311
 
312
+ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
313
  # Convert frame to numpy array
314
  frame = frame.to_ndarray(format="rgb24")
315
 
316
+ # Run face detection
317
+ detections = detect_faces(frame)
 
 
 
 
 
 
 
 
318
 
319
+ # Run face recognition
320
+ subjects = recognize_faces(frame, detections)
 
 
321
 
322
+ # Run face matching
323
+ matches = match_faces(subjects, gallery)
 
 
324
 
325
+ # Draw annotations
326
+ frame = draw_annotations(frame, detections, matches)
 
 
327
 
328
  # Convert frame back to av.VideoFrame
329
  frame = av.VideoFrame.from_ndarray(frame, format="rgb24")
330
 
331
+ return frame, matches
 
332
 
 
 
333
 
334
+ # Sidebar for face gallery
335
+ with st.sidebar:
336
+ st.markdown("# Face Gallery")
337
+ files = st.sidebar.file_uploader(
338
+ "Upload images to gallery",
339
+ type=["png", "jpg", "jpeg"],
340
+ accept_multiple_files=True,
341
+ label_visibility="collapsed",
342
+ )
343
 
344
+ # Init gallery
345
+ gallery = []
346
+ for file in files:
347
+ # Read file bytes
348
+ file_bytes = np.asarray(bytearray(file.read()), dtype=np.uint8)
349
 
350
+ # Decode image and convert from BGR to RGB
351
+ img = cv2.cvtColor(cv2.imdecode(file_bytes, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)
352
 
353
+ # Detect faces
354
+ detections = detect_faces(img)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
 
356
+ if detections:
357
+ # recognize faces
358
+ subjects = recognize_faces(img, detections[:1])
359
+
360
+ # Add subjects to gallery
361
+ gallery.append(
362
+ Identity(
363
+ name=os.path.splitext(file.name)[0],
364
+ embedding=subjects[0].embedding,
365
+ face=subjects[0].face,
366
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
  )
368
 
369
+ # Show gallery images
370
+ st.image(
371
+ image=[identity.face for identity in gallery],
372
+ caption=[identity.name for identity in gallery],
373
+ )
374
 
375
+ # Start streaming component
376
+ with ctx_container:
377
+ ctx = webrtc_streamer(
378
+ key="LiveFaceRecognition",
379
+ mode=WebRtcMode.SENDONLY,
380
+ rtc_configuration={"iceServers": ICE_SERVERS},
381
+ media_stream_constraints={"video": {"width": 1920}, "audio": False},
382
  )
 
 
383
 
384
+ # Initialize frame grabber
385
+ grabber = Grabber(ctx.video_receiver)
386
 
 
387
  if ctx.state.playing:
388
+ # Start frame grabber in background thread
389
+ grabber.thread.start()
390
+ timestamp = time.time()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
 
392
+ # Start main loop
393
+ while True:
394
+ frame = grabber.get_frame()
395
+ if frame is not None:
396
+ # Print frame timestamp to streamlit
397
+ st.write(f"Frame timestamp: {frame.time}")
398
+
399
+ # Run face detection and recognition
400
+ frame, matches = video_frame_callback(frame)
401
+
402
+ # Convert frame to numpy array
403
+ frame = frame.to_ndarray(format="rgb24")
404
+
405
+ # Show Stream
406
+ stream_container.image(frame, channels="RGB")
407
+
408
+ # Show Matches
409
+ if matches:
410
+ matches_container.image(
411
+ image=[match.subject_id.face for match in matches],
412
+ caption=[match.gallery_id.name for match in matches],
413
+ )
414
+ else:
415
+ matches_container.info("No matches found yet ...")
requirements.txt CHANGED
@@ -1,13 +1,9 @@
1
  streamlit
2
  scikit-image
3
  scikit-learn
4
- mediapipe
5
  opencv-python-headless
6
  watchdog
7
  streamlit-webrtc
8
- matplotlib
9
- streamlit-toggle-switch
10
- tflite-runtime
11
  twilio
12
- tqdm
13
- plotly
 
1
  streamlit
2
  scikit-image
3
  scikit-learn
 
4
  opencv-python-headless
5
  watchdog
6
  streamlit-webrtc
 
 
 
7
  twilio
8
+ onnxruntime
9
+ mediapipe
tools/__init__.py DELETED
File without changes
tools/annotation.py DELETED
@@ -1,107 +0,0 @@
1
- import numpy as np
2
- import cv2
3
-
4
-
5
- class Annotation:
6
- def __init__(self, draw_bbox=True, draw_landmarks=True, draw_name=True, upscale=True):
7
- self.bbox = draw_bbox
8
- self.landmarks = draw_landmarks
9
- self.name = draw_name
10
- self.upscale = upscale
11
-
12
- def __call__(self, frame, detections, identities, matches, gallery):
13
- shape = np.asarray(frame.shape[:2][::-1])
14
- if self.upscale:
15
- frame = cv2.resize(frame, (1920, 1080))
16
- upscale_factor = np.asarray([1920 / shape[0], 1080 / shape[1]])
17
- shape = np.asarray(frame.shape[:2][::-1])
18
- else:
19
- upscale_factor = np.asarray([1, 1])
20
-
21
- frame.flags.writeable = True
22
-
23
- for detection in detections:
24
- # Draw Landmarks
25
- if self.landmarks:
26
- for landmark in detection.landmarks:
27
- cv2.circle(
28
- frame,
29
- (landmark * upscale_factor).astype(int),
30
- 2,
31
- (255, 255, 255),
32
- -1,
33
- )
34
-
35
- # Draw Bounding Box
36
- if self.bbox:
37
- cv2.rectangle(
38
- frame,
39
- (detection.bbox[0] * upscale_factor).astype(int),
40
- (detection.bbox[1] * upscale_factor).astype(int),
41
- (255, 0, 0),
42
- 2,
43
- )
44
-
45
- # Draw Index
46
- cv2.putText(
47
- frame,
48
- str(detection.idx),
49
- (
50
- ((detection.bbox[1][0] + 2) * upscale_factor[0]).astype(int),
51
- ((detection.bbox[1][1] + 2) * upscale_factor[1]).astype(int),
52
- ),
53
- cv2.LINE_AA,
54
- 0.5,
55
- (0, 0, 0),
56
- 2,
57
- )
58
-
59
- # Draw Name
60
- if self.name:
61
- for match in matches:
62
- try:
63
- detection = detections[identities[match.identity_idx].detection_idx]
64
- except:
65
- print("Identity IDX: ", match.identity_idx)
66
- print("Len(Detections): ", len(detections))
67
- print("Len(Identites): ", len(identities))
68
- print("Detection IDX: ", identities[match.identity_idx].detection_idx)
69
-
70
- # print("Detections: ", detections)
71
-
72
- cv2.rectangle(
73
- frame,
74
- (detection.bbox[0] * upscale_factor).astype(int),
75
- (detection.bbox[1] * upscale_factor).astype(int),
76
- (0, 255, 0),
77
- 2,
78
- )
79
-
80
- cv2.rectangle(
81
- frame,
82
- (
83
- (detection.bbox[0][0] * upscale_factor[0]).astype(int),
84
- (detection.bbox[0][1] * upscale_factor[1] - (shape[1] // 25)).astype(int),
85
- ),
86
- (
87
- (detection.bbox[1][0] * upscale_factor[0]).astype(int),
88
- (detection.bbox[0][1] * upscale_factor[1]).astype(int),
89
- ),
90
- (255, 255, 255),
91
- -1,
92
- )
93
-
94
- cv2.putText(
95
- frame,
96
- gallery[match.gallery_idx].name,
97
- (
98
- ((detection.bbox[0][0] + shape[0] // 400) * upscale_factor[0]).astype(int),
99
- ((detection.bbox[0][1] - shape[1] // 100) * upscale_factor[1]).astype(int),
100
- ),
101
- cv2.LINE_AA,
102
- 0.5,
103
- (0, 0, 0),
104
- 2,
105
- )
106
-
107
- return frame
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/face_detection.py DELETED
@@ -1,481 +0,0 @@
1
- import tflite_runtime.interpreter as tflite
2
- import cv2
3
- import numpy as np
4
- from .utils import tflite_inference
5
- from .nametypes import Detection
6
- from .utils import get_file
7
-
8
-
9
- BASE_URL = "https://github.com/Martlgap/FaceIDLight/releases/download/v.0.1/"
10
-
11
- FILE_HASHES = {
12
- "o_net": "768385d570300648b7b881acbd418146522b79b4771029bb2e684bdd8c764b9f",
13
- "p_net": "530183192e24f7cc86b6706e1eb600482c4ed4306399ac939c472e3957bae15e",
14
- "r_net": "5ec33b065eb2802bc4c2575d21feff1a56958d854785bc3e2907d3b7ace861a2",
15
- }
16
-
17
-
18
- class StageStatus:
19
- """
20
- Keeps status between MTCNN stages
21
- """
22
-
23
- def __init__(self, pad_result: tuple = None, width=0, height=0):
24
- self.width = width
25
- self.height = height
26
- self.dy = self.edy = self.dx = self.edx = self.y = self.ey = self.x = self.ex = self.tmp_w = self.tmp_h = []
27
-
28
- if pad_result is not None:
29
- self.update(pad_result)
30
-
31
- def update(self, pad_result: tuple):
32
- s = self
33
- s.dy, s.edy, s.dx, s.edx, s.y, s.ey, s.x, s.ex, s.tmp_w, s.tmp_h = pad_result
34
-
35
-
36
- class FaceDetection:
37
- """
38
- Allows to perform MTCNN Detection ->
39
- a) Detection of faces (with the confidence probability)
40
- b) Detection of keypoints (left eye, right eye, nose, mouth_left, mouth_right)
41
- """
42
-
43
- def __init__(
44
- self,
45
- min_face_size: int = 40,
46
- steps_threshold: list = None,
47
- scale_factor: float = 0.7,
48
- min_detections_conf: float = 0.9,
49
- ):
50
- """
51
- Initializes the MTCNN.
52
- :param min_face_size: minimum size of the face to detect
53
- :param steps_threshold: step's thresholds values
54
- :param scale_factor: scale factor
55
- """
56
- if steps_threshold is None:
57
- steps_threshold = [0.6, 0.7, 0.7] # original mtcnn values [0.6, 0.7, 0.7]
58
- self._min_face_size = min_face_size
59
- self._steps_threshold = steps_threshold
60
- self._scale_factor = scale_factor
61
- self.min_detections_conf = min_detections_conf
62
- self.p_net = tflite.Interpreter(model_path=get_file(BASE_URL + "p_net.tflite", FILE_HASHES["p_net"]))
63
- self.r_net = tflite.Interpreter(model_path=get_file(BASE_URL + "r_net.tflite", FILE_HASHES["r_net"]))
64
- self.o_net = tflite.Interpreter(model_path=get_file(BASE_URL + "o_net.tflite", FILE_HASHES["o_net"]))
65
-
66
- def __call__(self, frame):
67
- """
68
- Detects bounding boxes from the specified image.
69
- :param img: image to process
70
- :return: list containing all the bounding boxes detected with their keypoints.
71
-
72
- From MTCNN:
73
- # Total boxes (bBoxes for faces)
74
- # 1. dim -> Number of found Faces
75
- # 2. dim -> x_min, y_min, x_max, y_max, score
76
-
77
- # Points (Landmarks left eye, right eye, nose, left mouth, right mouth)
78
- # 1. dim -> Number of found Faces
79
- # 2. dim -> x1, x2, x3, x4, x5, y2, y2, y3, y4, y5 Coordinates
80
- """
81
-
82
- height, width, _ = frame.shape
83
- stage_status = StageStatus(width=width, height=height)
84
- m = 12 / self._min_face_size
85
- min_layer = np.amin([height, width]) * m
86
- scales = self.__compute_scale_pyramid(m, min_layer)
87
-
88
- # We pipe here each of the stages
89
- total_boxes, stage_status = self.__stage1(frame, scales, stage_status)
90
- total_boxes, stage_status = self.__stage2(frame, total_boxes, stage_status)
91
- bboxes, points = self.__stage3(frame, total_boxes, stage_status)
92
-
93
- # Sort by location (to prevent flickering)
94
- sort_idx = np.argsort(bboxes[:, 0])
95
- bboxes = bboxes[sort_idx]
96
- points = points[sort_idx]
97
-
98
- # Transform to better shape and points now inside bbox
99
- detections = []
100
- cnt = 0
101
- for i in range(bboxes.shape[0]):
102
- conf = bboxes[i, -1].astype(np.float32)
103
- if conf > self.min_detections_conf:
104
- bboxes_c = np.reshape(bboxes[i, :-1], [2, 2]).astype(np.float32)
105
- points_c = np.reshape(points[i], [2, 5]).transpose().astype(np.float32)
106
- detections.append(
107
- Detection(
108
- idx=cnt,
109
- bbox=list(bboxes_c),
110
- landmarks=list(points_c),
111
- confidence=conf,
112
- )
113
- )
114
- cnt += 1
115
- return frame, detections
116
-
117
- def __compute_scale_pyramid(self, m, min_layer):
118
- scales = []
119
- factor_count = 0
120
-
121
- while min_layer >= 12:
122
- scales += [m * np.power(self._scale_factor, factor_count)]
123
- min_layer = min_layer * self._scale_factor
124
- factor_count += 1
125
-
126
- return scales
127
-
128
- @staticmethod
129
- def __scale_image(image, scale: float):
130
- """
131
- Scales the image to a given scale.
132
- :param image:
133
- :param scale:
134
- :return:
135
- """
136
- height, width, _ = image.shape
137
-
138
- width_scaled = int(np.ceil(width * scale))
139
- height_scaled = int(np.ceil(height * scale))
140
-
141
- im_data = cv2.resize(image, (width_scaled, height_scaled), interpolation=cv2.INTER_AREA)
142
-
143
- # Normalize the image's pixels
144
- im_data_normalized = (im_data - 127.5) * 0.0078125
145
-
146
- return im_data_normalized
147
-
148
- @staticmethod
149
- def __generate_bounding_box(imap, reg, scale, t):
150
- # use heatmap to generate bounding boxes
151
- stride = 2
152
- cellsize = 12
153
-
154
- imap = np.transpose(imap)
155
- dx1 = np.transpose(reg[:, :, 0])
156
- dy1 = np.transpose(reg[:, :, 1])
157
- dx2 = np.transpose(reg[:, :, 2])
158
- dy2 = np.transpose(reg[:, :, 3])
159
-
160
- y, x = np.where(imap >= t)
161
-
162
- if y.shape[0] == 1:
163
- dx1 = np.flipud(dx1)
164
- dy1 = np.flipud(dy1)
165
- dx2 = np.flipud(dx2)
166
- dy2 = np.flipud(dy2)
167
-
168
- score = imap[(y, x)]
169
- reg = np.transpose(np.vstack([dx1[(y, x)], dy1[(y, x)], dx2[(y, x)], dy2[(y, x)]]))
170
-
171
- if reg.size == 0:
172
- reg = np.empty(shape=(0, 3))
173
-
174
- bb = np.transpose(np.vstack([y, x]))
175
-
176
- q1 = np.fix((stride * bb + 1) / scale)
177
- q2 = np.fix((stride * bb + cellsize) / scale)
178
- boundingbox = np.hstack([q1, q2, np.expand_dims(score, 1), reg])
179
-
180
- return boundingbox, reg
181
-
182
- @staticmethod
183
- def __nms(boxes, threshold, method):
184
- """
185
- Non Maximum Suppression.
186
-
187
- :param boxes: np array with bounding boxes.
188
- :param threshold:
189
- :param method: NMS method to apply. Available values ('Min', 'Union')
190
- :return:
191
- """
192
- if boxes.size == 0:
193
- return np.empty((0, 3))
194
-
195
- x1 = boxes[:, 0]
196
- y1 = boxes[:, 1]
197
- x2 = boxes[:, 2]
198
- y2 = boxes[:, 3]
199
- s = boxes[:, 4]
200
-
201
- area = (x2 - x1 + 1) * (y2 - y1 + 1)
202
- sorted_s = np.argsort(s)
203
-
204
- pick = np.zeros_like(s, dtype=np.int16)
205
- counter = 0
206
- while sorted_s.size > 0:
207
- i = sorted_s[-1]
208
- pick[counter] = i
209
- counter += 1
210
- idx = sorted_s[0:-1]
211
-
212
- xx1 = np.maximum(x1[i], x1[idx])
213
- yy1 = np.maximum(y1[i], y1[idx])
214
- xx2 = np.minimum(x2[i], x2[idx])
215
- yy2 = np.minimum(y2[i], y2[idx])
216
-
217
- w = np.maximum(0.0, xx2 - xx1 + 1)
218
- h = np.maximum(0.0, yy2 - yy1 + 1)
219
-
220
- inter = w * h
221
-
222
- if method == "Min":
223
- o = inter / np.minimum(area[i], area[idx])
224
- else:
225
- o = inter / (area[i] + area[idx] - inter)
226
-
227
- sorted_s = sorted_s[np.where(o <= threshold)]
228
-
229
- pick = pick[0:counter]
230
-
231
- return pick
232
-
233
- @staticmethod
234
- def __pad(total_boxes, w, h):
235
- # compute the padding coordinates (pad the bounding boxes to square)
236
- tmp_w = (total_boxes[:, 2] - total_boxes[:, 0] + 1).astype(np.int32)
237
- tmp_h = (total_boxes[:, 3] - total_boxes[:, 1] + 1).astype(np.int32)
238
- numbox = total_boxes.shape[0]
239
-
240
- dx = np.ones(numbox, dtype=np.int32)
241
- dy = np.ones(numbox, dtype=np.int32)
242
- edx = tmp_w.copy().astype(np.int32)
243
- edy = tmp_h.copy().astype(np.int32)
244
-
245
- x = total_boxes[:, 0].copy().astype(np.int32)
246
- y = total_boxes[:, 1].copy().astype(np.int32)
247
- ex = total_boxes[:, 2].copy().astype(np.int32)
248
- ey = total_boxes[:, 3].copy().astype(np.int32)
249
-
250
- tmp = np.where(ex > w)
251
- edx.flat[tmp] = np.expand_dims(-ex[tmp] + w + tmp_w[tmp], 1)
252
- ex[tmp] = w
253
-
254
- tmp = np.where(ey > h)
255
- edy.flat[tmp] = np.expand_dims(-ey[tmp] + h + tmp_h[tmp], 1)
256
- ey[tmp] = h
257
-
258
- tmp = np.where(x < 1)
259
- dx.flat[tmp] = np.expand_dims(2 - x[tmp], 1)
260
- x[tmp] = 1
261
-
262
- tmp = np.where(y < 1)
263
- dy.flat[tmp] = np.expand_dims(2 - y[tmp], 1)
264
- y[tmp] = 1
265
-
266
- return dy, edy, dx, edx, y, ey, x, ex, tmp_w, tmp_h
267
-
268
- @staticmethod
269
- def __rerec(bbox):
270
- # convert bbox to square
271
- height = bbox[:, 3] - bbox[:, 1]
272
- width = bbox[:, 2] - bbox[:, 0]
273
- max_side_length = np.maximum(width, height)
274
- bbox[:, 0] = bbox[:, 0] + width * 0.5 - max_side_length * 0.5
275
- bbox[:, 1] = bbox[:, 1] + height * 0.5 - max_side_length * 0.5
276
- bbox[:, 2:4] = bbox[:, 0:2] + np.transpose(np.tile(max_side_length, (2, 1)))
277
- return bbox
278
-
279
- @staticmethod
280
- def __bbreg(boundingbox, reg):
281
- # calibrate bounding boxes
282
- if reg.shape[1] == 1:
283
- reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
284
-
285
- w = boundingbox[:, 2] - boundingbox[:, 0] + 1
286
- h = boundingbox[:, 3] - boundingbox[:, 1] + 1
287
- b1 = boundingbox[:, 0] + reg[:, 0] * w
288
- b2 = boundingbox[:, 1] + reg[:, 1] * h
289
- b3 = boundingbox[:, 2] + reg[:, 2] * w
290
- b4 = boundingbox[:, 3] + reg[:, 3] * h
291
- boundingbox[:, 0:4] = np.transpose(np.vstack([b1, b2, b3, b4]))
292
- return boundingbox
293
-
294
- def __stage1(self, image, scales: list, stage_status: StageStatus):
295
- """
296
- First stage of the MTCNN.
297
- :param image:
298
- :param scales:
299
- :param stage_status:
300
- :return:
301
- """
302
- total_boxes = np.empty((0, 9))
303
- status = stage_status
304
-
305
- for scale in scales:
306
- scaled_image = self.__scale_image(image, scale)
307
-
308
- img_x = np.expand_dims(scaled_image, 0)
309
- img_y = np.transpose(img_x, (0, 2, 1, 3))
310
-
311
- out = tflite_inference(self.p_net, img_y)
312
-
313
- out0 = np.transpose(out[0], (0, 2, 1, 3))
314
- out1 = np.transpose(out[1], (0, 2, 1, 3))
315
-
316
- boxes, _ = self.__generate_bounding_box(
317
- out1[0, :, :, 1].copy(),
318
- out0[0, :, :, :].copy(),
319
- scale,
320
- self._steps_threshold[0],
321
- )
322
-
323
- # inter-scale nms
324
- pick = self.__nms(boxes.copy(), 0.5, "Union")
325
- if boxes.size > 0 and pick.size > 0:
326
- boxes = boxes[pick, :]
327
- total_boxes = np.append(total_boxes, boxes, axis=0)
328
-
329
- numboxes = total_boxes.shape[0]
330
-
331
- if numboxes > 0:
332
- pick = self.__nms(total_boxes.copy(), 0.7, "Union")
333
- total_boxes = total_boxes[pick, :]
334
-
335
- regw = total_boxes[:, 2] - total_boxes[:, 0]
336
- regh = total_boxes[:, 3] - total_boxes[:, 1]
337
-
338
- qq1 = total_boxes[:, 0] + total_boxes[:, 5] * regw
339
- qq2 = total_boxes[:, 1] + total_boxes[:, 6] * regh
340
- qq3 = total_boxes[:, 2] + total_boxes[:, 7] * regw
341
- qq4 = total_boxes[:, 3] + total_boxes[:, 8] * regh
342
-
343
- total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:, 4]]))
344
- total_boxes = self.__rerec(total_boxes.copy())
345
-
346
- total_boxes[:, 0:4] = np.fix(total_boxes[:, 0:4]).astype(np.int32)
347
- status = StageStatus(
348
- self.__pad(total_boxes.copy(), stage_status.width, stage_status.height),
349
- width=stage_status.width,
350
- height=stage_status.height,
351
- )
352
-
353
- return total_boxes, status
354
-
355
- def __stage2(self, img, total_boxes, stage_status: StageStatus):
356
- """
357
- Second stage of the MTCNN.
358
- :param img:
359
- :param total_boxes:
360
- :param stage_status:
361
- :return:
362
- """
363
-
364
- num_boxes = total_boxes.shape[0]
365
- if num_boxes == 0:
366
- return total_boxes, stage_status
367
-
368
- # second stage
369
- tempimg = np.zeros(shape=(24, 24, 3, num_boxes))
370
-
371
- for k in range(0, num_boxes):
372
- tmp = np.zeros((int(stage_status.tmp_h[k]), int(stage_status.tmp_w[k]), 3))
373
-
374
- tmp[
375
- stage_status.dy[k] - 1 : stage_status.edy[k],
376
- stage_status.dx[k] - 1 : stage_status.edx[k],
377
- :,
378
- ] = img[
379
- stage_status.y[k] - 1 : stage_status.ey[k],
380
- stage_status.x[k] - 1 : stage_status.ex[k],
381
- :,
382
- ]
383
-
384
- if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
385
- tempimg[:, :, :, k] = cv2.resize(tmp, (24, 24), interpolation=cv2.INTER_AREA)
386
-
387
- else:
388
- return np.empty(shape=(0,)), stage_status
389
-
390
- tempimg = (tempimg - 127.5) * 0.0078125
391
- tempimg1 = np.transpose(tempimg, (3, 1, 0, 2))
392
-
393
- out = tflite_inference(self.r_net, tempimg1)
394
-
395
- out0 = np.transpose(out[0])
396
- out1 = np.transpose(out[1])
397
-
398
- score = out1[1, :]
399
-
400
- ipass = np.where(score > self._steps_threshold[1])
401
-
402
- total_boxes = np.hstack([total_boxes[ipass[0], 0:4].copy(), np.expand_dims(score[ipass].copy(), 1)])
403
-
404
- mv = out0[:, ipass[0]]
405
-
406
- if total_boxes.shape[0] > 0:
407
- pick = self.__nms(total_boxes, 0.7, "Union")
408
- total_boxes = total_boxes[pick, :]
409
- total_boxes = self.__bbreg(total_boxes.copy(), np.transpose(mv[:, pick]))
410
- total_boxes = self.__rerec(total_boxes.copy())
411
-
412
- return total_boxes, stage_status
413
-
414
- def __stage3(self, img, total_boxes, stage_status: StageStatus):
415
- """
416
- Third stage of the MTCNN.
417
-
418
- :param img:
419
- :param total_boxes:
420
- :param stage_status:
421
- :return:
422
- """
423
- num_boxes = total_boxes.shape[0]
424
- if num_boxes == 0:
425
- return total_boxes, np.empty(shape=(0,))
426
-
427
- total_boxes = np.fix(total_boxes).astype(np.int32)
428
-
429
- status = StageStatus(
430
- self.__pad(total_boxes.copy(), stage_status.width, stage_status.height),
431
- width=stage_status.width,
432
- height=stage_status.height,
433
- )
434
-
435
- tempimg = np.zeros((48, 48, 3, num_boxes))
436
-
437
- for k in range(0, num_boxes):
438
- tmp = np.zeros((int(status.tmp_h[k]), int(status.tmp_w[k]), 3))
439
-
440
- tmp[status.dy[k] - 1 : status.edy[k], status.dx[k] - 1 : status.edx[k], :] = img[
441
- status.y[k] - 1 : status.ey[k], status.x[k] - 1 : status.ex[k], :
442
- ]
443
-
444
- if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
445
- tempimg[:, :, :, k] = cv2.resize(tmp, (48, 48), interpolation=cv2.INTER_AREA)
446
- else:
447
- return np.empty(shape=(0,)), np.empty(shape=(0,))
448
-
449
- tempimg = (tempimg - 127.5) * 0.0078125
450
- tempimg1 = np.transpose(tempimg, (3, 1, 0, 2))
451
-
452
- out = tflite_inference(self.o_net, tempimg1)
453
- out0 = np.transpose(out[0])
454
- out1 = np.transpose(out[1])
455
- out2 = np.transpose(out[2])
456
-
457
- score = out2[1, :]
458
-
459
- points = out1
460
-
461
- ipass = np.where(score > self._steps_threshold[2])
462
-
463
- points = points[:, ipass[0]]
464
-
465
- total_boxes = np.hstack([total_boxes[ipass[0], 0:4].copy(), np.expand_dims(score[ipass].copy(), 1)])
466
-
467
- mv = out0[:, ipass[0]]
468
-
469
- w = total_boxes[:, 2] - total_boxes[:, 0] + 1
470
- h = total_boxes[:, 3] - total_boxes[:, 1] + 1
471
-
472
- points[0:5, :] = np.tile(w, (5, 1)) * points[0:5, :] + np.tile(total_boxes[:, 0], (5, 1)) - 1
473
- points[5:10, :] = np.tile(h, (5, 1)) * points[5:10, :] + np.tile(total_boxes[:, 1], (5, 1)) - 1
474
-
475
- if total_boxes.shape[0] > 0:
476
- total_boxes = self.__bbreg(total_boxes.copy(), np.transpose(mv))
477
- pick = self.__nms(total_boxes.copy(), 0.7, "Min")
478
- total_boxes = total_boxes[pick, :]
479
- points = points[:, pick]
480
-
481
- return total_boxes, points.transpose()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/face_recognition.py DELETED
@@ -1,114 +0,0 @@
1
- from .utils import tflite_inference
2
- from .nametypes import Identity, Match
3
- from sklearn.metrics.pairwise import cosine_distances
4
- import numpy as np
5
- import cv2
6
- from skimage.transform import SimilarityTransform
7
- from .utils import get_file
8
- import tflite_runtime.interpreter as tflite
9
- from typing import Literal
10
-
11
-
12
- BASE_URL = "https://github.com/Martlgap/FaceIDLight/releases/download/v.0.1/"
13
-
14
- FILE_HASHES = {
15
- "mobileNet": "6c19b789f661caa8da735566490bfd8895beffb2a1ec97a56b126f0539991aa6",
16
- "resNet": "f4d8b0194957a3ad766135505fc70a91343660151a8103bbb6c3b8ac34dbb4e2",
17
- }
18
-
19
-
20
- class FaceRecognition:
21
- def __init__(
22
- self,
23
- min_similarity: float = 0.67,
24
- model_name: Literal["mobileNet", "resNet50"] = "mobileNet",
25
- ):
26
- self.min_similarity = min_similarity
27
- self.model = tflite.Interpreter(model_path=get_file(BASE_URL + f"{model_name}.tflite", FILE_HASHES[model_name]))
28
-
29
- def __call__(self, frame, detections):
30
- # Align Faces
31
- faces, faces_aligned = [], []
32
- for detection in detections:
33
- face = frame[
34
- int(detection.bbox[0][1]) : int(detection.bbox[1][1]),
35
- int(detection.bbox[0][0]) : int(detection.bbox[1][0]),
36
- ]
37
- try:
38
- face = cv2.resize(face, (112, 112))
39
- except:
40
- face = np.zeros((112, 112, 3))
41
-
42
- faces.append(face)
43
- faces_aligned.append(self.align(frame, detection.landmarks))
44
-
45
- # Do Inference
46
- if len(faces_aligned) == 0:
47
- return []
48
-
49
- # Normalize images from [0, 255] to [0, 1]
50
- faces_aligned_norm = np.asarray(faces_aligned).astype(np.float32) / 255.0
51
-
52
- embs_det = tflite_inference(self.model, faces_aligned_norm)
53
- embs_det = np.asarray(embs_det[0])
54
-
55
- # Save Identities
56
- identities = []
57
- for idx, detection in enumerate(detections):
58
- identities.append(
59
- Identity(
60
- detection_idx=detection.idx,
61
- embedding=embs_det[idx],
62
- face_aligned=faces_aligned[idx],
63
- )
64
- )
65
- return identities
66
-
67
- def find_matches(self, identities, gallery):
68
- if len(gallery) == 0 or len(identities) == 0:
69
- return []
70
-
71
- # Get Embeddings
72
- embs_gal = np.asarray([identity.embedding for identity in gallery])
73
- embs_det = np.asarray([identity.embedding for identity in identities])
74
-
75
- # Calculate Cosine Distances
76
- cos_distances = cosine_distances(embs_det, embs_gal)
77
-
78
- # Find Matches
79
- matches = []
80
- for ident_idx, identity in enumerate(identities):
81
- dist_to_identity = cos_distances[ident_idx]
82
- idx_min = np.argmin(dist_to_identity)
83
- if dist_to_identity[idx_min] < self.min_similarity:
84
- matches.append(
85
- Match(
86
- identity_idx=identity.detection_idx,
87
- gallery_idx=idx_min,
88
- distance=dist_to_identity[idx_min],
89
- name=gallery[idx_min].name,
90
- )
91
- )
92
-
93
- # Sort Matches by identity_idx
94
- matches = sorted(matches, key=lambda match: match.gallery_idx)
95
-
96
- return matches
97
-
98
- @staticmethod
99
- def align(img, landmarks_source, target_size=(112, 112)):
100
- landmarks_target = np.array(
101
- [
102
- [38.2946, 51.6963],
103
- [73.5318, 51.5014],
104
- [56.0252, 71.7366],
105
- [41.5493, 92.3655],
106
- [70.7299, 92.2041],
107
- ],
108
- dtype=np.float32,
109
- )
110
- tform = SimilarityTransform()
111
- tform.estimate(landmarks_source, landmarks_target)
112
- tmatrix = tform.params[0:2, :]
113
- face_aligned = cv2.warpAffine(img, tmatrix, target_size, borderValue=0.0)
114
- return face_aligned
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/gallery.py DELETED
@@ -1,37 +0,0 @@
1
- from .face_detection import FaceDetection
2
- from .face_recognition import FaceRecognition
3
- from .nametypes import Identity
4
- import cv2
5
- import os
6
- import numpy as np
7
-
8
-
9
- def init_gallery(files, min_detections_conf=0.8, min_similarity=0.67, model_name="mobileNet"):
10
- face_detector = FaceDetection(min_detections_conf=min_detections_conf)
11
- face_recognizer = FaceRecognition(model_name=model_name, min_similarity=min_similarity)
12
-
13
- gallery = []
14
- for file in files:
15
- file_bytes = np.asarray(bytearray(file.read()), dtype=np.uint8)
16
- img = cv2.cvtColor(cv2.imdecode(file_bytes, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)
17
- # Face Detection
18
- img, detections = face_detector(img)
19
-
20
- if detections == []:
21
- continue
22
- elif len(detections) > 1:
23
- detections = detections[:1]
24
-
25
- # Face Recognition
26
- identities = face_recognizer(img, detections)
27
-
28
- # Add to gallery
29
- gallery.append(
30
- Identity(
31
- name=os.path.splitext(file.name)[0],
32
- embedding=identities[0].embedding,
33
- face_aligned=identities[0].face_aligned,
34
- )
35
- )
36
-
37
- return gallery
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/nametypes.py DELETED
@@ -1,33 +0,0 @@
1
- from typing import NamedTuple, List
2
- import numpy as np
3
-
4
-
5
- class Detection(NamedTuple):
6
- idx: int = None
7
- bbox: List[List[float]] = None
8
- landmarks: List[List[float]] = None
9
- confidence: float = None
10
-
11
-
12
- class Identity(NamedTuple):
13
- detection_idx: int = None
14
- name: str = None
15
- embedding: np.ndarray = None
16
- face_aligned: np.ndarray = None
17
-
18
-
19
- class Stats(NamedTuple):
20
- fps: float = 0
21
- resolution: List[int] = [None, None, None]
22
- num_faces: int = 0
23
- detection: float = None
24
- recognition: float = None
25
- matching: float = None
26
- annotation: float = None
27
-
28
-
29
- class Match(NamedTuple):
30
- identity_idx: int = None
31
- gallery_idx: int = None
32
- distance: float = None
33
- name: str = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/pca.py DELETED
@@ -1,59 +0,0 @@
1
- from sklearn.decomposition import PCA
2
- import numpy as np
3
- import plotly.express as px
4
-
5
-
6
- def pca(matches, identities, gallery, dim=3):
7
- """
8
- Perform PCA on embeddings.
9
- Args:
10
- embeddings: np.array of shape (n_embeddings, 512)
11
- Returns:
12
- embeddings_pca: np.array of shape (n_embeddings, 3)
13
- """
14
-
15
- # Get Gallery and Detection Embeddings and stich them together in groups
16
- embeddings = np.concatenate(
17
- [[gallery[match.gallery_idx].embedding, identities[match.identity_idx].embedding] for match in matches],
18
- axis=0,
19
- )
20
-
21
- # Get Identity Names and stich them together in groups
22
- identity_names = np.concatenate(
23
- [[gallery[match.gallery_idx].name, gallery[match.gallery_idx].name] for match in matches],
24
- axis=0,
25
- )
26
-
27
- # Do 3D PCA
28
- pca = PCA(n_components=dim)
29
- pca.fit(embeddings)
30
- embeddings_pca = pca.transform(embeddings)
31
-
32
- if dim == 3:
33
- fig = px.scatter_3d(
34
- embeddings_pca,
35
- x=0,
36
- y=1,
37
- z=2,
38
- opacity=0.7,
39
- color=identity_names,
40
- color_discrete_sequence=px.colors.qualitative.Vivid,
41
- )
42
- fig.update_traces(marker=dict(size=4))
43
- elif dim == 2:
44
- fig = px.scatter(
45
- embeddings_pca,
46
- x=0,
47
- y=1,
48
- opacity=0.7,
49
- color=identity_names,
50
- color_discrete_sequence=px.colors.qualitative.Vivid,
51
- )
52
- fig.update_traces(marker=dict(size=4))
53
- fig.update_xaxes(showgrid=True)
54
- fig.update_yaxes(showgrid=True)
55
- else:
56
- raise ValueError("dim must be either 2 or 3")
57
- fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
58
-
59
- return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/utils.py DELETED
@@ -1,164 +0,0 @@
1
- import logging
2
- import os
3
- import streamlit as st
4
- from twilio.rest import Client
5
- import os
6
- import numpy as np
7
- import hashlib
8
- import tempfile
9
- import os
10
- import hashlib
11
- from tqdm import tqdm
12
- from zipfile import ZipFile
13
- from urllib.request import urlopen
14
-
15
-
16
- logger = logging.getLogger(__name__)
17
-
18
-
19
- @st.cache_data
20
- def get_ice_servers(name="twilio"):
21
- """Get ICE servers from Twilio.
22
- Returns:
23
- List of ICE servers.
24
- """
25
- if name == "twilio":
26
- # Ref: https://www.twilio.com/docs/stun-turn/api
27
- try:
28
- account_sid = os.environ["TWILIO_ACCOUNT_SID"]
29
- auth_token = os.environ["TWILIO_AUTH_TOKEN"]
30
- except KeyError:
31
- logger.warning("Twilio credentials are not set. Fallback to a free STUN server from Google.")
32
- return [{"urls": ["stun:stun.l.google.com:19302"]}]
33
-
34
- client = Client(account_sid, auth_token)
35
-
36
- token = client.tokens.create()
37
-
38
- return token.ice_servers
39
-
40
- elif name == "metered":
41
- try:
42
- username = os.environ["METERED_USERNAME"]
43
- credential = os.environ["METERED_CREDENTIAL"]
44
- except KeyError:
45
- logger.warning("Metered credentials are not set. Fallback to a free STUN server from Google.")
46
- return [{"urls": ["stun:stun.l.google.com:19302"]}]
47
-
48
- ice_servers = [
49
- {"url": "stun:a.relay.metered.ca:80", "urls": "stun:a.relay.metered.ca:80"},
50
- {
51
- "url": "turn:a.relay.metered.ca:80",
52
- "username": username,
53
- "urls": "turn:a.relay.metered.ca:80",
54
- "credential": credential,
55
- },
56
- {
57
- "url": "turn:a.relay.metered.ca:80?transport=tcp",
58
- "username": username,
59
- "urls": "turn:a.relay.metered.ca:80?transport=tcp",
60
- "credential": credential,
61
- },
62
- {
63
- "url": "turn:a.relay.metered.ca:443",
64
- "username": username,
65
- "urls": "turn:a.relay.metered.ca:443",
66
- "credential": credential,
67
- },
68
- {
69
- "url": "turn:a.relay.metered.ca:443?transport=tcp",
70
- "username": username,
71
- "urls": "turn:a.relay.metered.ca:443?transport=tcp",
72
- "credential": credential,
73
- },
74
- ]
75
- return ice_servers
76
- else:
77
- raise ValueError(f"Unknown name: {name}")
78
-
79
-
80
- # Function to format floats within a list
81
- def format_dflist(val):
82
- if isinstance(val, list):
83
- return [format_dflist(num) for num in val]
84
- if isinstance(val, np.ndarray):
85
- return np.asarray([format_dflist(num) for num in val])
86
- if isinstance(val, np.float32):
87
- return f"{val:.2f}"
88
- if isinstance(val, float):
89
- return f"{val:.2f}"
90
- else:
91
- return val
92
-
93
-
94
- def rgb(r, g, b):
95
- return "#{:02x}{:02x}{:02x}".format(r, g, b)
96
-
97
-
98
- def tflite_inference(model, img):
99
- """Inferences an image through the model with tflite interpreter on CPU
100
- :param model: a tflite.Interpreter loaded with a model
101
- :param img: image
102
- :return: list of outputs of the model
103
- """
104
- # Check if img is np.ndarray
105
- if not isinstance(img, np.ndarray):
106
- img = np.asarray(img)
107
-
108
- # Check if dim is 4
109
- if len(img.shape) == 3:
110
- img = np.expand_dims(img, axis=0)
111
-
112
- input_details = model.get_input_details()
113
- output_details = model.get_output_details()
114
- model.resize_tensor_input(input_details[0]["index"], img.shape)
115
- model.allocate_tensors()
116
- model.set_tensor(input_details[0]["index"], img.astype(np.float32))
117
- model.invoke()
118
- return [model.get_tensor(elem["index"]) for elem in output_details]
119
-
120
-
121
- def get_file(origin, file_hash, is_zip=False):
122
- tmp_file = os.path.join(tempfile.gettempdir(), "FaceIDLight", origin.split("/")[-1])
123
- os.makedirs(os.path.dirname(tmp_file), exist_ok=True)
124
- if not os.path.exists(tmp_file):
125
- download = True
126
- else:
127
- hasher = hashlib.sha256()
128
- with open(tmp_file, "rb") as file:
129
- for chunk in iter(lambda: file.read(65535), b""):
130
- hasher.update(chunk)
131
- if not hasher.hexdigest() == file_hash:
132
- print(
133
- "A local file was found, but it seems to be incomplete or outdated because the file hash does not "
134
- "match the original value of " + file_hash + " so data will be downloaded."
135
- )
136
- download = True
137
- else:
138
- download = False
139
-
140
- if download:
141
- response = urlopen(origin)
142
- with tqdm.wrapattr(
143
- open(tmp_file, "wb"),
144
- "write",
145
- miniters=1,
146
- desc="Downloading " + origin.split("/")[-1] + " to: " + tmp_file,
147
- total=getattr(response, "length", None),
148
- ) as file:
149
- for chunk in response:
150
- file.write(chunk)
151
- file.close()
152
- if is_zip:
153
- with ZipFile(tmp_file, "r") as zipObj:
154
- zipObj.extractall(tmp_file.split(".")[0])
155
- tmp_file = os.path.join(tmp_file.split(".")[0])
156
- return tmp_file
157
-
158
-
159
- def get_hash(filepath):
160
- hasher = hashlib.sha256()
161
- with open(filepath, "rb") as file:
162
- for chunk in iter(lambda: file.read(65535), b""):
163
- hasher.update(chunk)
164
- return hasher.hexdigest()