Spaces:
Runtime error
Runtime error
#! /usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# Copyright 2021 Imperial College London (Pingchuan Ma) | |
# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) | |
import warnings | |
import torchvision | |
import mediapipe as mp | |
import os | |
import cv2 | |
import numpy as np | |
class LandmarksDetector: | |
def __init__(self): | |
self.mp_face_detection = mp.solutions.face_detection | |
self.short_range_detector = self.mp_face_detection.FaceDetection(min_detection_confidence=0.5, model_selection=0) | |
self.full_range_detector = self.mp_face_detection.FaceDetection(min_detection_confidence=0.5, model_selection=1) | |
def __call__(self, filename): | |
video_frames = torchvision.io.read_video(filename, pts_unit='sec')[0].numpy() | |
landmarks = self.detect(video_frames, self.full_range_detector) | |
if all(element is None for element in landmarks): | |
landmarks = self.detect(video_frames, self.short_range_detector) | |
assert any(l is not None for l in landmarks), "Cannot detect any frames in the video" | |
return landmarks | |
def detect(self, video_frames, detector): | |
landmarks = [] | |
for frame in video_frames: | |
results = detector.process(frame) | |
if not results.detections: | |
landmarks.append(None) | |
continue | |
face_points = [] | |
for idx, detected_faces in enumerate(results.detections): | |
max_id, max_size = 0, 0 | |
bboxC = detected_faces.location_data.relative_bounding_box | |
ih, iw, ic = frame.shape | |
bbox = int(bboxC.xmin * iw), int(bboxC.ymin * ih), int(bboxC.width * iw), int(bboxC.height * ih) | |
bbox_size = (bbox[2] - bbox[0]) + (bbox[3] - bbox[1]) | |
if bbox_size > max_size: | |
max_id, max_size = idx, bbox_size | |
lmx = [ | |
[int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(0).value].x * iw), | |
int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(0).value].y * ih)], | |
[int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(1).value].x * iw), | |
int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(1).value].y * ih)], | |
[int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(2).value].x * iw), | |
int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(2).value].y * ih)], | |
[int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(3).value].x * iw), | |
int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(3).value].y * ih)], | |
] | |
face_points.append(lmx) | |
landmarks.append(np.array(face_points[max_id])) | |
return landmarks | |