from face_detection import RetinaFace from model import SixDRepNet import math import re from matplotlib import pyplot as plt import sys import os import argparse import numpy as np import cv2 import matplotlib.pyplot as plt from numpy.lib.function_base import _quantile_unchecked import torch import torch.nn as nn from torch.utils.data import DataLoader from torchvision import transforms import torch.backends.cudnn as cudnn import torchvision import torch.nn.functional as F import utils import matplotlib from PIL import Image import time matplotlib.use('TkAgg') def parse_args(): """Parse input arguments.""" parser = argparse.ArgumentParser( description='Head pose estimation using the 6DRepNet.') parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', default=0, type=int) parser.add_argument('--cam', dest='cam_id', help='Camera device id to use [0]', default=0, type=int) parser.add_argument('--snapshot', dest='snapshot', help='Name of model snapshot.', default='', type=str) parser.add_argument('--save_viz', dest='save_viz', help='Save images with pose cube.', default=False, type=bool) args = parser.parse_args() return args transformations = transforms.Compose([transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) if __name__ == '__main__': args = parse_args() cudnn.enabled = True gpu = args.gpu_id cam = args.cam_id snapshot_path = args.snapshot model = SixDRepNet(backbone_name='RepVGG-A0', backbone_file='', deploy=True, pretrained=False) print('Loading data.') detector = RetinaFace(gpu_id=gpu) # Load snapshot saved_state_dict = torch.load(os.path.join( snapshot_path), map_location='cpu') if 'model_state_dict' in saved_state_dict: model.load_state_dict(saved_state_dict['model_state_dict']) else: model.load_state_dict(saved_state_dict) if gpu != -1: model.cuda(gpu) # Test the Model model.eval() # Change model to 'eval' mode (BN uses moving mean/var). cap = cv2.VideoCapture(cam) # Check if the webcam is opened correctly if not cap.isOpened(): raise IOError("Cannot open webcam") with torch.no_grad(): while True: ret, frame = cap.read() faces = detector(frame) for box, landmarks, score in faces: # Print the location of each face in this image if score < .95: continue x_min = int(box[0]) y_min = int(box[1]) x_max = int(box[2]) y_max = int(box[3]) bbox_width = abs(x_max - x_min) bbox_height = abs(y_max - y_min) x_min = max(0, x_min-int(0.2*bbox_height)) y_min = max(0, y_min-int(0.2*bbox_width)) x_max = x_max+int(0.2*bbox_height) y_max = y_max+int(0.2*bbox_width) img = frame[y_min:y_max, x_min:x_max] img = Image.fromarray(img) img = img.convert('RGB') img = transformations(img) img = torch.Tensor(img[None, :]) if gpu != -1: img = img.cuda(gpu) c = cv2.waitKey(1) if c == 27: break start = time.time() R_pred = model(img) end = time.time() print('Head pose estimation: %2f ms' % ((end - start)*1000.)) euler = utils.compute_euler_angles_from_rotation_matrices( R_pred,use_gpu=False)*180/np.pi p_pred_deg = euler[:, 0].cpu() y_pred_deg = euler[:, 1].cpu() r_pred_deg = euler[:, 2].cpu() #utils.draw_axis(frame, y_pred_deg, p_pred_deg, r_pred_deg, left+int(.5*(right-left)), top, size=100) utils.plot_pose_cube(frame, y_pred_deg, p_pred_deg, r_pred_deg, x_min + int(.5*( x_max-x_min)), y_min + int(.5*(y_max-y_min)), size=bbox_width) cv2.imshow("Demo", np.array(frame, dtype = np.uint8)) cv2.waitKey(5)