File size: 6,538 Bytes
d1b31ce c0f6432 c83f8fa d1b31ce c83f8fa c0f6432 b404794 c0f6432 d1b31ce c0f6432 d1b31ce 8416b80 d1b31ce 8416b80 d1b31ce cf60969 2db5685 d1b31ce c83f8fa d1b31ce e761828 8416b80 c0f6432 c83f8fa c0f6432 c83f8fa c0f6432 c83f8fa c0f6432 c83f8fa cf60969 2db5685 c83f8fa c0f6432 cf60969 2db5685 c0f6432 c83f8fa c0f6432 c83f8fa c0f6432 c83f8fa c0f6432 c83f8fa c0f6432 c83f8fa c0f6432 c83f8fa c0f6432 c83f8fa c0f6432 c83f8fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
"""
File: app_utils.py
Author: Elena Ryumina and Dmitry Ryumin
Description: This module contains utility functions for facial expression recognition application.
License: MIT License
"""
import torch
import numpy as np
import mediapipe as mp
from PIL import Image
import cv2
from pytorch_grad_cam.utils.image import show_cam_on_image
# Importing necessary components for the Gradio app
from app.model import pth_model_static, pth_model_dynamic, cam, pth_processing
from app.face_utils import get_box, display_info
from app.config import DICT_EMO, config_data
from app.plot import statistics_plot
mp_face_mesh = mp.solutions.face_mesh
def preprocess_image_and_predict(inp):
inp = np.array(inp)
if inp is None:
return None, None, None
try:
h, w = inp.shape[:2]
except Exception:
return None, None, None
with mp_face_mesh.FaceMesh(
max_num_faces=1,
refine_landmarks=False,
min_detection_confidence=0.5,
min_tracking_confidence=0.5,
) as face_mesh:
results = face_mesh.process(inp)
if results.multi_face_landmarks:
for fl in results.multi_face_landmarks:
startX, startY, endX, endY = get_box(fl, w, h)
cur_face = inp[startY:endY, startX:endX]
cur_face_n = pth_processing(Image.fromarray(cur_face))
with torch.no_grad():
prediction = (
torch.nn.functional.softmax(pth_model_static(cur_face_n), dim=1)
.detach()
.numpy()[0]
)
confidences = {DICT_EMO[i]: float(prediction[i]) for i in range(7)}
grayscale_cam = cam(input_tensor=cur_face_n)
grayscale_cam = grayscale_cam[0, :]
cur_face_hm = cv2.resize(cur_face,(224,224))
cur_face_hm = np.float32(cur_face_hm) / 255
heatmap = show_cam_on_image(cur_face_hm, grayscale_cam, use_rgb=True)
return cur_face, heatmap, confidences
else:
return None, None, None
def preprocess_video_and_predict(video):
cap = cv2.VideoCapture(video)
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = np.round(cap.get(cv2.CAP_PROP_FPS))
path_save_video_face = 'result_face.mp4'
vid_writer_face = cv2.VideoWriter(path_save_video_face, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
path_save_video_hm = 'result_hm.mp4'
vid_writer_hm = cv2.VideoWriter(path_save_video_hm, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
lstm_features = []
count_frame = 1
count_face = 0
probs = []
frames = []
last_output = None
last_heatmap = None
cur_face = None
with mp_face_mesh.FaceMesh(
max_num_faces=1,
refine_landmarks=False,
min_detection_confidence=0.5,
min_tracking_confidence=0.5) as face_mesh:
while cap.isOpened():
_, frame = cap.read()
if frame is None: break
frame_copy = frame.copy()
frame_copy.flags.writeable = False
frame_copy = cv2.cvtColor(frame_copy, cv2.COLOR_BGR2RGB)
results = face_mesh.process(frame_copy)
frame_copy.flags.writeable = True
if results.multi_face_landmarks:
for fl in results.multi_face_landmarks:
startX, startY, endX, endY = get_box(fl, w, h)
cur_face = frame_copy[startY:endY, startX: endX]
if count_face%config_data.FRAME_DOWNSAMPLING == 0:
cur_face_copy = pth_processing(Image.fromarray(cur_face))
with torch.no_grad():
features = torch.nn.functional.relu(pth_model_static.extract_features(cur_face_copy)).detach().numpy()
grayscale_cam = cam(input_tensor=cur_face_copy)
grayscale_cam = grayscale_cam[0, :]
cur_face_hm = cv2.resize(cur_face,(224,224), interpolation = cv2.INTER_AREA)
cur_face_hm = np.float32(cur_face_hm) / 255
heatmap = show_cam_on_image(cur_face_hm, grayscale_cam, use_rgb=False)
last_heatmap = heatmap
if len(lstm_features) == 0:
lstm_features = [features]*10
else:
lstm_features = lstm_features[1:] + [features]
lstm_f = torch.from_numpy(np.vstack(lstm_features))
lstm_f = torch.unsqueeze(lstm_f, 0)
with torch.no_grad():
output = pth_model_dynamic(lstm_f).detach().numpy()
last_output = output
if count_face == 0:
count_face += 1
else:
if last_output is not None:
output = last_output
heatmap = last_heatmap
elif last_output is None:
output = np.empty((1, 7))
output[:] = np.nan
probs.append(output[0])
frames.append(count_frame)
else:
if last_output is not None:
lstm_features = []
empty = np.empty((7))
empty[:] = np.nan
probs.append(empty)
frames.append(count_frame)
if cur_face is not None:
heatmap_f = display_info(heatmap, 'Frame: {}'.format(count_frame), box_scale=.3)
cur_face = cv2.cvtColor(cur_face, cv2.COLOR_RGB2BGR)
cur_face = cv2.resize(cur_face, (224,224), interpolation = cv2.INTER_AREA)
cur_face = display_info(cur_face, 'Frame: {}'.format(count_frame), box_scale=.3)
vid_writer_face.write(cur_face)
vid_writer_hm.write(heatmap_f)
count_frame += 1
if count_face != 0:
count_face += 1
vid_writer_face.release()
vid_writer_hm.release()
stat = statistics_plot(frames, probs)
if not stat:
return None, None, None, None
return video, path_save_video_face, path_save_video_hm, stat |