|
from sklearn.decomposition import PCA, IncrementalPCA |
|
from sklearn.decomposition import TruncatedSVD |
|
import numpy as np |
|
import pickle |
|
import os |
|
from tqdm import tqdm |
|
from numpy import save, load |
|
import math |
|
from PIL import Image |
|
from numpy import save, load |
|
|
|
|
|
class PCAUtility: |
|
eigenvalues_prefix = "_eigenvalues_" |
|
eigenvectors_prefix = "_eigenvectors_" |
|
meanvector_prefix = "_meanvector_" |
|
|
|
def create_pca_from_npy(self, dataset_name, labels_npy_path, pca_percentages): |
|
""" |
|
generate and save eigenvalues, eigenvectors, meanvector |
|
:param labels_npy_path: the path to the normalized labels that are save in npy format. |
|
:param pca_percentages: % of eigenvalues that will be used |
|
:return: generate |
|
""" |
|
path = labels_npy_path |
|
print('PCA calculation started: loading labels') |
|
|
|
lbl_arr = [] |
|
for file in tqdm(os.listdir(path)): |
|
if file.endswith(".npy"): |
|
npy_file = os.path.join(path, file) |
|
lbl_arr.append(load(npy_file)) |
|
|
|
lbl_arr = np.array(lbl_arr) |
|
|
|
reduced_lbl_arr, eigenvalues, eigenvectors = self._func_PCA(lbl_arr, pca_percentages) |
|
mean_lbl_arr = np.mean(lbl_arr, axis=0) |
|
eigenvectors = eigenvectors.T |
|
|
|
save('./pca_obj/' + dataset_name + self.eigenvalues_prefix + str(pca_percentages), eigenvalues) |
|
save('./pca_obj/' + dataset_name + self.eigenvectors_prefix + str(pca_percentages), eigenvectors) |
|
save('./pca_obj/' + dataset_name + self.meanvector_prefix + str(pca_percentages), mean_lbl_arr) |
|
|
|
def load_pca_obj(self, dataset_name, pca_percentages): |
|
eigenvalues = np.load('./pca_obj/' + dataset_name + self.eigenvalues_prefix + str(pca_percentages)) |
|
eigenvectors = np.load('./pca_obj/' + dataset_name + self.eigenvectors_prefix + str(pca_percentages)) |
|
meanvector = np.load('./pca_obj/' + dataset_name + self.meanvector_prefix + str(pca_percentages)) |
|
return eigenvalues, eigenvectors, meanvector |
|
|
|
def calculate_b_vector(self, predicted_vector, correction, eigenvalues, eigenvectors, meanvector): |
|
tmp1 = predicted_vector - meanvector |
|
b_vector = np.dot(eigenvectors.T, tmp1) |
|
|
|
|
|
if correction: |
|
i = 0 |
|
for b_item in b_vector: |
|
lambda_i_sqr = 3 * math.sqrt(eigenvalues[i]) |
|
|
|
if b_item > 0: |
|
b_item = min(b_item, lambda_i_sqr) |
|
else: |
|
b_item = max(b_item, -1 * lambda_i_sqr) |
|
b_vector[i] = b_item |
|
i += 1 |
|
|
|
return b_vector |
|
|
|
def _func_PCA(self, input_data, pca_postfix): |
|
input_data = np.array(input_data) |
|
pca = PCA(n_components=pca_postfix / 100) |
|
pca.fit(input_data) |
|
pca_input_data = pca.transform(input_data) |
|
eigenvalues = pca.explained_variance_ |
|
eigenvectors = pca.components_ |
|
return pca_input_data, eigenvalues, eigenvectors |
|
|