Spaces:
Runtime error
Runtime error
File size: 6,106 Bytes
8f412ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
from scipy.io import loadmat
import pandas as pd
import numpy as np
from random import shuffle
import os
import cv2
class DataManager(object):
"""Class for loading fer2013 emotion classification dataset or
imdb gender classification dataset."""
def __init__(self, dataset_name='imdb',
dataset_path=None, image_size=(48, 48)):
self.dataset_name = dataset_name
self.dataset_path = dataset_path
self.image_size = image_size
if self.dataset_path is not None:
self.dataset_path = dataset_path
elif self.dataset_name == 'imdb':
self.dataset_path = '../datasets/imdb_crop/imdb.mat'
elif self.dataset_name == 'fer2013':
self.dataset_path = '../datasets/fer2013/fer2013.csv'
elif self.dataset_name == 'KDEF':
self.dataset_path = '../datasets/KDEF/'
else:
raise Exception(
'Incorrect dataset name, please input imdb or fer2013')
def get_data(self):
if self.dataset_name == 'imdb':
ground_truth_data = self._load_imdb()
elif self.dataset_name == 'fer2013':
ground_truth_data = self._load_fer2013()
elif self.dataset_name == 'KDEF':
ground_truth_data = self._load_KDEF()
return ground_truth_data
def _load_imdb(self):
face_score_treshold = 3
dataset = loadmat(self.dataset_path)
image_names_array = dataset['imdb']['full_path'][0, 0][0]
gender_classes = dataset['imdb']['gender'][0, 0][0]
face_score = dataset['imdb']['face_score'][0, 0][0]
second_face_score = dataset['imdb']['second_face_score'][0, 0][0]
face_score_mask = face_score > face_score_treshold
second_face_score_mask = np.isnan(second_face_score)
unknown_gender_mask = np.logical_not(np.isnan(gender_classes))
mask = np.logical_and(face_score_mask, second_face_score_mask)
mask = np.logical_and(mask, unknown_gender_mask)
image_names_array = image_names_array[mask]
gender_classes = gender_classes[mask].tolist()
image_names = []
for image_name_arg in range(image_names_array.shape[0]):
image_name = image_names_array[image_name_arg][0]
image_names.append(image_name)
return dict(zip(image_names, gender_classes))
def _load_fer2013(self):
data = pd.read_csv(self.dataset_path)
pixels = data['pixels'].tolist()
width, height = 48, 48
faces = []
for pixel_sequence in pixels:
face = [int(pixel) for pixel in pixel_sequence.split(' ')]
face = np.asarray(face).reshape(width, height)
face = cv2.resize(face.astype('uint8'), self.image_size)
faces.append(face.astype('float32'))
faces = np.asarray(faces)
faces = np.expand_dims(faces, -1)
emotions = pd.get_dummies(data['emotion']).as_matrix()
return faces, emotions
def _load_KDEF(self):
class_to_arg = get_class_to_arg(self.dataset_name)
num_classes = len(class_to_arg)
file_paths = []
for folder, subfolders, filenames in os.walk(self.dataset_path):
for filename in filenames:
if filename.lower().endswith(('.jpg')):
file_paths.append(os.path.join(folder, filename))
num_faces = len(file_paths)
y_size, x_size = self.image_size
faces = np.zeros(shape=(num_faces, y_size, x_size))
emotions = np.zeros(shape=(num_faces, num_classes))
for file_arg, file_path in enumerate(file_paths):
image_array = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
image_array = cv2.resize(image_array, (y_size, x_size))
faces[file_arg] = image_array
file_basename = os.path.basename(file_path)
file_emotion = file_basename[4:6]
# there are two file names in the dataset
# that don't match the given classes
try:
emotion_arg = class_to_arg[file_emotion]
except:
continue
emotions[file_arg, emotion_arg] = 1
faces = np.expand_dims(faces, -1)
return faces, emotions
def get_labels(dataset_name):
if dataset_name == 'fer2013':
return {0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy',
4: 'sad', 5: 'surprise', 6: 'neutral'}
elif dataset_name == 'imdb':
return {0: 'woman', 1: 'man'}
elif dataset_name == 'KDEF':
return {0: 'AN', 1: 'DI', 2: 'AF', 3: 'HA', 4: 'SA', 5: 'SU', 6: 'NE'}
else:
raise Exception('Invalid dataset name')
def get_class_to_arg(dataset_name='fer2013'):
if dataset_name == 'fer2013':
return {'angry': 0, 'disgust': 1, 'fear': 2, 'happy': 3, 'sad': 4,
'surprise': 5, 'neutral': 6}
elif dataset_name == 'imdb':
return {'woman': 0, 'man': 1}
elif dataset_name == 'KDEF':
return {'AN': 0, 'DI': 1, 'AF': 2, 'HA': 3, 'SA': 4, 'SU': 5, 'NE': 6}
else:
raise Exception('Invalid dataset name')
def split_imdb_data(ground_truth_data, validation_split=.2, do_shuffle=False):
ground_truth_keys = sorted(ground_truth_data.keys())
if do_shuffle is not False:
shuffle(ground_truth_keys)
training_split = 1 - validation_split
num_train = int(training_split * len(ground_truth_keys))
train_keys = ground_truth_keys[:num_train]
validation_keys = ground_truth_keys[num_train:]
return train_keys, validation_keys
def split_data(x, y, validation_split=.2):
num_samples = len(x)
num_train_samples = int((1 - validation_split)*num_samples)
train_x = x[:num_train_samples]
train_y = y[:num_train_samples]
val_x = x[num_train_samples:]
val_y = y[num_train_samples:]
train_data = (train_x, train_y)
val_data = (val_x, val_y)
return train_data, val_data
|