Spaces:

hysts
/

ibug-emotion_recognition

Running on Zero

App Files Files Community

ibug-emotion_recognition / app.py

hysts HF staff

Update

366a48b 6 months ago

raw

history blame contribute delete

4.23 kB

	#!/usr/bin/env python

	from __future__ import annotations

	import pathlib
	import sys

	import cv2
	import gradio as gr
	import numpy as np
	import spaces
	import torch

	sys.path.insert(0, "face_detection")
	sys.path.insert(0, "face_alignment")
	sys.path.insert(0, "emotion_recognition")

	from ibug.emotion_recognition import EmoNetPredictor
	from ibug.face_alignment import FANPredictor
	from ibug.face_detection import RetinaFacePredictor

	DESCRIPTION = "# [ibug-group/emotion_recognition](https://github.com/ibug-group/emotion_recognition)"

	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

	face_detector = RetinaFacePredictor(threshold=0.8, device="cpu", model=RetinaFacePredictor.get_model("mobilenet0.25"))
	face_detector.device = device
	face_detector.net.to(device)

	landmark_detector = FANPredictor(
	device="cpu", model=FANPredictor.get_model("2dfan2"), config=FANPredictor.create_config(use_jit=False)
	)
	landmark_detector.device = device
	landmark_detector.net.to(device)


	def load_model(model_name: str, device: torch.device) -> EmoNetPredictor:
	model = EmoNetPredictor(
	device="cpu", model=EmoNetPredictor.get_model(model_name), config=EmoNetPredictor.create_config(use_jit=False)
	)
	model.device = device
	model.net.to(device)
	return model


	model_names = [
	"emonet248",
	"emonet245",
	"emonet248_alt",
	"emonet245_alt",
	]
	models = {name: load_model(name, device) for name in model_names}


	@spaces.GPU
	def predict(image: np.ndarray, model_name: str, max_num_faces: int) -> np.ndarray:
	model = models[model_name]
	if len(model.config.emotion_labels) == 8:
	colors: tuple[tuple[int, int, int], ...] = (
	(192, 192, 192),
	(0, 255, 0),
	(255, 0, 0),
	(0, 255, 255),
	(0, 128, 255),
	(255, 0, 128),
	(0, 0, 255),
	(128, 255, 0),
	)
	else:
	colors = (
	(192, 192, 192),
	(0, 255, 0),
	(255, 0, 0),
	(0, 255, 255),
	(0, 0, 255),
	)

	# RGB -> BGR
	image = image[:, :, ::-1]

	faces = face_detector(image, rgb=False)
	if len(faces) == 0:
	raise gr.Error("No face was found.")
	faces = sorted(list(faces), key=lambda x: -x[4])[:max_num_faces]
	faces = np.asarray(faces)
	_, _, features = landmark_detector(image, faces, rgb=False, return_features=True)
	emotions = model(features)

	res = image.copy()
	for index, face in enumerate(faces):
	box = np.round(face[:4]).astype(int)
	cv2.rectangle(res, tuple(box[:2]), tuple(box[2:]), (0, 255, 0), 2)

	emotion = emotions["emotion"][index]
	valence = emotions["valence"][index]
	arousal = emotions["arousal"][index]
	emotion_label = model.config.emotion_labels[emotion].title()

	text_content = f"{emotion_label} ({valence: .01f}, {arousal: .01f})"
	cv2.putText(
	res, text_content, (box[0], box[1] - 10), cv2.FONT_HERSHEY_DUPLEX, 1, colors[emotion], lineType=cv2.LINE_AA
	)

	return res[:, :, ::-1]


	with gr.Blocks(css="style.css") as demo:
	gr.Markdown(DESCRIPTION)
	with gr.Row():
	with gr.Column():
	image = gr.Image(label="Input", type="numpy")
	model_name = gr.Radio(
	label="Model",
	choices=model_names,
	value=model_names[0],
	type="value",
	)
	max_num_of_faces = gr.Slider(
	label="Max Number of Faces",
	minimum=1,
	maximum=30,
	step=1,
	value=30,
	)
	run_button = gr.Button()
	with gr.Column():
	result = gr.Image(label="Output")
	gr.Examples(
	examples=[[path.as_posix(), model_names[0], 30] for path in sorted(pathlib.Path("images").rglob("*.jpg"))],
	inputs=[image, model_name, max_num_of_faces],
	outputs=result,
	fn=predict,
	)
	run_button.click(
	fn=predict,
	inputs=[image, model_name, max_num_of_faces],
	outputs=result,
	api_name="predict",
	)

	if __name__ == "__main__":
	demo.queue(max_size=20).launch()