HairFastGAN / utils /shape_predictor.py
JohannesBerends's picture
Upload folder using huggingface_hub
7f173a3 verified
import os
from pathlib import Path
import PIL
import dlib
import numpy as np
import scipy
import scipy.ndimage
import torch
from PIL import Image
from torchvision import transforms as T
from utils.drive import open_url
"""
brief: face alignment with FFHQ method (https://github.com/NVlabs/ffhq-dataset)
author: lzhbrian (https://lzhbrian.me)
date: 2020.1.5
note: code is heavily borrowed from
https://github.com/NVlabs/ffhq-dataset
http://dlib.net/face_landmark_detection.py.html
requirements:
apt install cmake
conda install Pillow numpy scipy
pip install dlib
# download face landmark model from:
# http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
"""
def get_landmark(filepath, predictor):
"""get landmark with dlib
:return: np.array shape=(68, 2)
"""
detector = dlib.get_frontal_face_detector()
img = dlib.load_rgb_image(filepath)
dets = detector(img, 1)
filepath = Path(filepath)
print(f"{filepath.name}: Number of faces detected: {len(dets)}")
shapes = [predictor(img, d) for k, d in enumerate(dets)]
lms = [np.array([[tt.x, tt.y] for tt in shape.parts()]) for shape in shapes]
return lms
def get_landmark_from_tensors(tensors: list[torch.Tensor | Image.Image | np.ndarray], predictor):
detector = dlib.get_frontal_face_detector()
transform = T.ToPILImage()
images = []
lms = []
for k, tensor in enumerate(tensors):
if isinstance(tensor, torch.Tensor):
img_pil = transform(tensor)
else:
img_pil = tensor
img = np.array(img_pil)
images.append(img_pil)
dets = detector(img, 1)
if len(dets) == 0:
raise ValueError(f"No faces detected in the image {k}.")
elif len(dets) == 1:
print(f"Number of faces detected: {len(dets)}")
else:
print(f"Number of faces detected: {len(dets)}, get largest face")
# Find the largest face
dets = sorted(dets, key=lambda det: det.width() * det.height(), reverse=True)
shape = predictor(img, dets[0])
lm = np.array([[tt.x, tt.y] for tt in shape.parts()])
lms.append(lm)
return images, lms
def align_face(data, predictor=None, is_filepath=False, return_tensors=True):
"""
:param data: filepath or list torch Tensors
:return: list of PIL Images
"""
if predictor is None:
predictor_path = 'shape_predictor_68_face_landmarks.dat'
if not os.path.isfile(predictor_path):
print("Downloading Shape Predictor")
data_io = open_url("https://drive.google.com/uc?id=1huhv8PYpNNKbGCLOaYUjOgR1pY5pmbJx")
with open(predictor_path, 'wb') as f:
f.write(data_io.getbuffer())
predictor = dlib.shape_predictor(predictor_path)
if is_filepath:
lms = get_landmark(data, predictor)
else:
if not isinstance(data, list):
data = [data]
images, lms = get_landmark_from_tensors(data, predictor)
imgs = []
for num_img, lm in enumerate(lms):
lm_chin = lm[0: 17] # left-right
lm_eyebrow_left = lm[17: 22] # left-right
lm_eyebrow_right = lm[22: 27] # left-right
lm_nose = lm[27: 31] # top-down
lm_nostrils = lm[31: 36] # top-down
lm_eye_left = lm[36: 42] # left-clockwise
lm_eye_right = lm[42: 48] # left-clockwise
lm_mouth_outer = lm[48: 60] # left-clockwise
lm_mouth_inner = lm[60: 68] # left-clockwise
# Calculate auxiliary vectors.
eye_left = np.mean(lm_eye_left, axis=0)
eye_right = np.mean(lm_eye_right, axis=0)
eye_avg = (eye_left + eye_right) * 0.5
eye_to_eye = eye_right - eye_left
mouth_left = lm_mouth_outer[0]
mouth_right = lm_mouth_outer[6]
mouth_avg = (mouth_left + mouth_right) * 0.5
eye_to_mouth = mouth_avg - eye_avg
# Choose oriented crop rectangle.
x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1]
x /= np.hypot(*x)
x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8)
y = np.flipud(x) * [-1, 1]
c = eye_avg + eye_to_mouth * 0.1
quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
qsize = np.hypot(*x) * 2
# read image
if is_filepath:
img = PIL.Image.open(data)
else:
img = images[num_img]
output_size = 1024
# output_size = 256
transform_size = 4096
enable_padding = True
# Shrink.
shrink = int(np.floor(qsize / output_size * 0.5))
if shrink > 1:
rsize = (int(np.rint(float(img.size[0]) / shrink)), int(np.rint(float(img.size[1]) / shrink)))
img = img.resize(rsize, PIL.Image.ANTIALIAS)
quad /= shrink
qsize /= shrink
# Crop.
border = max(int(np.rint(qsize * 0.1)), 3)
crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
int(np.ceil(max(quad[:, 1]))))
crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]),
min(crop[3] + border, img.size[1]))
if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]:
img = img.crop(crop)
quad -= crop[0:2]
# Pad.
pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
int(np.ceil(max(quad[:, 1]))))
pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0),
max(pad[3] - img.size[1] + border, 0))
if enable_padding and max(pad) > border - 4:
pad = np.maximum(pad, int(np.rint(qsize * 0.3)))
img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
h, w, _ = img.shape
y, x, _ = np.ogrid[:h, :w, :1]
mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0], np.float32(w - 1 - x) / pad[2]),
1.0 - np.minimum(np.float32(y) / pad[1], np.float32(h - 1 - y) / pad[3]))
blur = qsize * 0.02
img += (scipy.ndimage.gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
img += (np.median(img, axis=(0, 1)) - img) * np.clip(mask, 0.0, 1.0)
img = PIL.Image.fromarray(np.uint8(np.clip(np.rint(img), 0, 255)), 'RGB')
quad += pad[:2]
# Transform.
img = img.transform((transform_size, transform_size), PIL.Image.QUAD, (quad + 0.5).flatten(),
PIL.Image.BILINEAR)
if output_size < transform_size:
img = img.resize((output_size, output_size), PIL.Image.LANCZOS)
# Save aligned image.
imgs.append(img)
if return_tensors:
transform = T.ToTensor()
tensors = [transform(img).clamp(0, 1) for img in imgs]
return tensors
return imgs