File size: 3,007 Bytes
ad1e0a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
from sklearn.decomposition import PCA, IncrementalPCA
from sklearn.decomposition import TruncatedSVD
import numpy as np
import pickle
import os
from tqdm import tqdm
from numpy import save, load
import math
from PIL import Image
from numpy import save, load
class PCAUtility:
eigenvalues_prefix = "_eigenvalues_"
eigenvectors_prefix = "_eigenvectors_"
meanvector_prefix = "_meanvector_"
def create_pca_from_npy(self, dataset_name, labels_npy_path, pca_percentages):
"""
generate and save eigenvalues, eigenvectors, meanvector
:param labels_npy_path: the path to the normalized labels that are save in npy format.
:param pca_percentages: % of eigenvalues that will be used
:return: generate
"""
path = labels_npy_path
print('PCA calculation started: loading labels')
lbl_arr = []
for file in tqdm(os.listdir(path)):
if file.endswith(".npy"):
npy_file = os.path.join(path, file)
lbl_arr.append(load(npy_file))
lbl_arr = np.array(lbl_arr)
reduced_lbl_arr, eigenvalues, eigenvectors = self._func_PCA(lbl_arr, pca_percentages)
mean_lbl_arr = np.mean(lbl_arr, axis=0)
eigenvectors = eigenvectors.T
save('./pca_obj/' + dataset_name + self.eigenvalues_prefix + str(pca_percentages), eigenvalues)
save('./pca_obj/' + dataset_name + self.eigenvectors_prefix + str(pca_percentages), eigenvectors)
save('./pca_obj/' + dataset_name + self.meanvector_prefix + str(pca_percentages), mean_lbl_arr)
def load_pca_obj(self, dataset_name, pca_percentages):
eigenvalues = np.load('./pca_obj/' + dataset_name + self.eigenvalues_prefix + str(pca_percentages))
eigenvectors = np.load('./pca_obj/' + dataset_name + self.eigenvectors_prefix + str(pca_percentages))
meanvector = np.load('./pca_obj/' + dataset_name + self.meanvector_prefix + str(pca_percentages))
return eigenvalues, eigenvectors, meanvector
def calculate_b_vector(self, predicted_vector, correction, eigenvalues, eigenvectors, meanvector):
tmp1 = predicted_vector - meanvector
b_vector = np.dot(eigenvectors.T, tmp1)
# put b in -3lambda =>
if correction:
i = 0
for b_item in b_vector:
lambda_i_sqr = 3 * math.sqrt(eigenvalues[i])
if b_item > 0:
b_item = min(b_item, lambda_i_sqr)
else:
b_item = max(b_item, -1 * lambda_i_sqr)
b_vector[i] = b_item
i += 1
return b_vector
def _func_PCA(self, input_data, pca_postfix):
input_data = np.array(input_data)
pca = PCA(n_components=pca_postfix / 100)
pca.fit(input_data)
pca_input_data = pca.transform(input_data)
eigenvalues = pca.explained_variance_
eigenvectors = pca.components_
return pca_input_data, eigenvalues, eigenvectors
|