AhmedIbrahim007's picture
Upload 36 files
8f412ba verified
raw
history blame
6.11 kB
from scipy.io import loadmat
import pandas as pd
import numpy as np
from random import shuffle
import os
import cv2
class DataManager(object):
"""Class for loading fer2013 emotion classification dataset or
imdb gender classification dataset."""
def __init__(self, dataset_name='imdb',
dataset_path=None, image_size=(48, 48)):
self.dataset_name = dataset_name
self.dataset_path = dataset_path
self.image_size = image_size
if self.dataset_path is not None:
self.dataset_path = dataset_path
elif self.dataset_name == 'imdb':
self.dataset_path = '../datasets/imdb_crop/imdb.mat'
elif self.dataset_name == 'fer2013':
self.dataset_path = '../datasets/fer2013/fer2013.csv'
elif self.dataset_name == 'KDEF':
self.dataset_path = '../datasets/KDEF/'
else:
raise Exception(
'Incorrect dataset name, please input imdb or fer2013')
def get_data(self):
if self.dataset_name == 'imdb':
ground_truth_data = self._load_imdb()
elif self.dataset_name == 'fer2013':
ground_truth_data = self._load_fer2013()
elif self.dataset_name == 'KDEF':
ground_truth_data = self._load_KDEF()
return ground_truth_data
def _load_imdb(self):
face_score_treshold = 3
dataset = loadmat(self.dataset_path)
image_names_array = dataset['imdb']['full_path'][0, 0][0]
gender_classes = dataset['imdb']['gender'][0, 0][0]
face_score = dataset['imdb']['face_score'][0, 0][0]
second_face_score = dataset['imdb']['second_face_score'][0, 0][0]
face_score_mask = face_score > face_score_treshold
second_face_score_mask = np.isnan(second_face_score)
unknown_gender_mask = np.logical_not(np.isnan(gender_classes))
mask = np.logical_and(face_score_mask, second_face_score_mask)
mask = np.logical_and(mask, unknown_gender_mask)
image_names_array = image_names_array[mask]
gender_classes = gender_classes[mask].tolist()
image_names = []
for image_name_arg in range(image_names_array.shape[0]):
image_name = image_names_array[image_name_arg][0]
image_names.append(image_name)
return dict(zip(image_names, gender_classes))
def _load_fer2013(self):
data = pd.read_csv(self.dataset_path)
pixels = data['pixels'].tolist()
width, height = 48, 48
faces = []
for pixel_sequence in pixels:
face = [int(pixel) for pixel in pixel_sequence.split(' ')]
face = np.asarray(face).reshape(width, height)
face = cv2.resize(face.astype('uint8'), self.image_size)
faces.append(face.astype('float32'))
faces = np.asarray(faces)
faces = np.expand_dims(faces, -1)
emotions = pd.get_dummies(data['emotion']).as_matrix()
return faces, emotions
def _load_KDEF(self):
class_to_arg = get_class_to_arg(self.dataset_name)
num_classes = len(class_to_arg)
file_paths = []
for folder, subfolders, filenames in os.walk(self.dataset_path):
for filename in filenames:
if filename.lower().endswith(('.jpg')):
file_paths.append(os.path.join(folder, filename))
num_faces = len(file_paths)
y_size, x_size = self.image_size
faces = np.zeros(shape=(num_faces, y_size, x_size))
emotions = np.zeros(shape=(num_faces, num_classes))
for file_arg, file_path in enumerate(file_paths):
image_array = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
image_array = cv2.resize(image_array, (y_size, x_size))
faces[file_arg] = image_array
file_basename = os.path.basename(file_path)
file_emotion = file_basename[4:6]
# there are two file names in the dataset
# that don't match the given classes
try:
emotion_arg = class_to_arg[file_emotion]
except:
continue
emotions[file_arg, emotion_arg] = 1
faces = np.expand_dims(faces, -1)
return faces, emotions
def get_labels(dataset_name):
if dataset_name == 'fer2013':
return {0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy',
4: 'sad', 5: 'surprise', 6: 'neutral'}
elif dataset_name == 'imdb':
return {0: 'woman', 1: 'man'}
elif dataset_name == 'KDEF':
return {0: 'AN', 1: 'DI', 2: 'AF', 3: 'HA', 4: 'SA', 5: 'SU', 6: 'NE'}
else:
raise Exception('Invalid dataset name')
def get_class_to_arg(dataset_name='fer2013'):
if dataset_name == 'fer2013':
return {'angry': 0, 'disgust': 1, 'fear': 2, 'happy': 3, 'sad': 4,
'surprise': 5, 'neutral': 6}
elif dataset_name == 'imdb':
return {'woman': 0, 'man': 1}
elif dataset_name == 'KDEF':
return {'AN': 0, 'DI': 1, 'AF': 2, 'HA': 3, 'SA': 4, 'SU': 5, 'NE': 6}
else:
raise Exception('Invalid dataset name')
def split_imdb_data(ground_truth_data, validation_split=.2, do_shuffle=False):
ground_truth_keys = sorted(ground_truth_data.keys())
if do_shuffle is not False:
shuffle(ground_truth_keys)
training_split = 1 - validation_split
num_train = int(training_split * len(ground_truth_keys))
train_keys = ground_truth_keys[:num_train]
validation_keys = ground_truth_keys[num_train:]
return train_keys, validation_keys
def split_data(x, y, validation_split=.2):
num_samples = len(x)
num_train_samples = int((1 - validation_split)*num_samples)
train_x = x[:num_train_samples]
train_y = y[:num_train_samples]
val_x = x[num_train_samples:]
val_y = y[num_train_samples:]
train_data = (train_x, train_y)
val_data = (val_x, val_y)
return train_data, val_data