Spaces:
Running
on
Zero
Running
on
Zero
#!/usr/bin/env python | |
from __future__ import annotations | |
import os | |
import pathlib | |
import sys | |
import cv2 | |
import gradio as gr | |
import numpy as np | |
import torch | |
sys.path.insert(0, "face_detection") | |
sys.path.insert(0, "face_alignment") | |
sys.path.insert(0, "emotion_recognition") | |
from ibug.emotion_recognition import EmoNetPredictor | |
from ibug.face_alignment import FANPredictor | |
from ibug.face_detection import RetinaFacePredictor | |
DESCRIPTION = "# [ibug-group/emotion_recognition](https://github.com/ibug-group/emotion_recognition)" | |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | |
face_detector = RetinaFacePredictor(threshold=0.8, device=device, model=RetinaFacePredictor.get_model("mobilenet0.25")) | |
landmark_detector = FANPredictor(device=device, model=FANPredictor.get_model("2dfan2")) | |
model_names = [ | |
"emonet248", | |
"emonet245", | |
"emonet248_alt", | |
"emonet245_alt", | |
] | |
models = {name: EmoNetPredictor(device=device, model=EmoNetPredictor.get_model(name)) for name in model_names} | |
def predict(image: np.ndarray, model_name: str, max_num_faces: int) -> np.ndarray: | |
model = models[model_name] | |
if len(model.config.emotion_labels) == 8: | |
colors: tuple[tuple[int, int, int], ...] = ( | |
(192, 192, 192), | |
(0, 255, 0), | |
(255, 0, 0), | |
(0, 255, 255), | |
(0, 128, 255), | |
(255, 0, 128), | |
(0, 0, 255), | |
(128, 255, 0), | |
) | |
else: | |
colors = ( | |
(192, 192, 192), | |
(0, 255, 0), | |
(255, 0, 0), | |
(0, 255, 255), | |
(0, 0, 255), | |
) | |
# RGB -> BGR | |
image = image[:, :, ::-1] | |
faces = face_detector(image, rgb=False) | |
if len(faces) == 0: | |
raise gr.Error("No face was found.") | |
faces = sorted(list(faces), key=lambda x: -x[4])[:max_num_faces] | |
faces = np.asarray(faces) | |
_, _, features = landmark_detector(image, faces, rgb=False, return_features=True) | |
emotions = model(features) | |
res = image.copy() | |
for index, face in enumerate(faces): | |
box = np.round(face[:4]).astype(int) | |
cv2.rectangle(res, tuple(box[:2]), tuple(box[2:]), (0, 255, 0), 2) | |
emotion = emotions["emotion"][index] | |
valence = emotions["valence"][index] | |
arousal = emotions["arousal"][index] | |
emotion_label = model.config.emotion_labels[emotion].title() | |
text_content = f"{emotion_label} ({valence: .01f}, {arousal: .01f})" | |
cv2.putText( | |
res, text_content, (box[0], box[1] - 10), cv2.FONT_HERSHEY_DUPLEX, 1, colors[emotion], lineType=cv2.LINE_AA | |
) | |
return res[:, :, ::-1] | |
with gr.Blocks(css="style.css") as demo: | |
gr.Markdown(DESCRIPTION) | |
with gr.Row(): | |
with gr.Column(): | |
image = gr.Image(label="Input", type="numpy") | |
model_name = gr.Radio( | |
label="Model", | |
choices=model_names, | |
value=model_names[0], | |
type="value", | |
) | |
max_num_of_faces = gr.Slider( | |
label="Max Number of Faces", | |
minimum=1, | |
maximum=30, | |
step=1, | |
value=30, | |
) | |
run_button = gr.Button() | |
with gr.Column(): | |
result = gr.Image(label="Output") | |
gr.Examples( | |
examples=[[path.as_posix(), model_names[0], 30] for path in sorted(pathlib.Path("images").rglob("*.jpg"))], | |
inputs=[image, model_name, max_num_of_faces], | |
outputs=result, | |
fn=predict, | |
cache_examples=os.getenv("CACHE_EXAMPLES") == "1", | |
) | |
run_button.click( | |
fn=predict, | |
inputs=[image, model_name, max_num_of_faces], | |
outputs=result, | |
api_name="predict", | |
) | |
if __name__ == "__main__": | |
demo.queue(max_size=20).launch() | |