|
import numpy as np |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
import statistics |
|
import pandas as pd |
|
from sklearn.ensemble import RandomForestClassifier |
|
from sklearn.metrics import classification_report, confusion_matrix, \ |
|
accuracy_score, roc_auc_score, roc_curve, f1_score, recall_score, precision_score |
|
import matplotlib.pyplot as plt |
|
import copy |
|
from sklearn import preprocessing, tree |
|
from sklearn.linear_model import LogisticRegression, LinearRegression |
|
from sklearn.tree import DecisionTreeClassifier |
|
from scipy.spatial import distance |
|
from sklearn.naive_bayes import GaussianNB |
|
import itertools |
|
import os |
|
from sklearn.model_selection import train_test_split |
|
import matplotlib.pyplot as plt |
|
import random |
|
from sklearn.utils import shuffle |
|
from imblearn.under_sampling import NearMiss,TomekLinks |
|
from imblearn.over_sampling import SMOTE |
|
from collections import Counter |
|
from imblearn.combine import SMOTETomek, SMOTEENN |
|
from sklearn.model_selection import StratifiedKFold |
|
from imblearn.pipeline import make_pipeline |
|
|
|
from matplotlib import pyplot |
|
from scipy import interp |
|
from sklearn.metrics import roc_curve,auc |
|
|
|
|
|
from keras.models import Sequential |
|
from keras.layers import Dense, SimpleRNN, LSTM |
|
|
|
|
|
|
|
folder = os.listdir('ComE_per_timestep/embs') |
|
path = 'ComE_per_timestep/embs' |
|
ComE_id_embs = [] |
|
for file in folder: |
|
ComE_id_embs.append(np.genfromtxt(os.path.join(path, file), dtype=None).tolist()) |
|
|
|
|
|
|
|
folder = os.listdir('ComE_per_timestep/labels_pred') |
|
path = 'ComE_per_timestep/labels_pred' |
|
ComE_lbls = [] |
|
for file in folder: |
|
ComE_lbls.append(np.genfromtxt(os.path.join(path, file), dtype=None).tolist()) |
|
|
|
|
|
|
|
node_ids = [] |
|
for step in ComE_id_embs: |
|
tmp = [id_emb[0] for id_emb in step] |
|
node_ids.append(tmp) |
|
|
|
|
|
|
|
id_clr = [] |
|
for i in range(len(node_ids)): |
|
tmp = {} |
|
for ind,node in enumerate(node_ids[i]): |
|
tmp[node] = ComE_lbls[i][ind] |
|
id_clr.append(tmp) |
|
|
|
|
|
clustered_nodes_init = [] |
|
for ind,i in enumerate(id_clr): |
|
clrids_uniq = set(i.values()) |
|
d = {} |
|
for clrid in clrids_uniq: |
|
d[clrid] = [k for k in i.keys() if i[k] == clrid] |
|
clustered_nodes_init.append(d) |
|
|
|
clustered_nodes = [] |
|
for s in clustered_nodes_init: |
|
per_step = [] |
|
for k,v in sorted(s.items()): |
|
per_step.append(v) |
|
clustered_nodes.append(per_step) |
|
|
|
|
|
|
|
|
|
|
|
folder = os.listdir('ComE_features_per_timestep/') |
|
path = 'ComE_features_per_timestep/' |
|
id_ComE_feats_clr = [] |
|
id_ComE_feats_out = [] |
|
id_ComE_feats_gbl = [] |
|
id_ComE_feats_clrout = [] |
|
id_ComE_feats_clrgbl = [] |
|
id_ComE_feats_all = [] |
|
for file in folder: |
|
df_ComE = pd.read_csv(os.path.join(path,file), names=['node_id', \ |
|
'distin_med_eucl', 'distin_med_cos', 'distin_med_l1',\ |
|
'distout_med_eucl', 'distout_med_cos', 'distout_med_l1',\ |
|
'distin_eucl_max', 'distin_eucl_min', 'distin_eucl_avg',\ |
|
'distin_cos_max', 'distin_cos_min', 'distin_cos_avg',\ |
|
'distin_l1_max', 'distin_l1_min', 'distin_l1_avg',\ |
|
'distout_eucl_max', 'distout_eucl_min', 'distout_eucl_avg',\ |
|
'distout_cos_max', 'distout_cos_min', 'distout_cos_avg',\ |
|
'distout_l1_max', 'distout_l1_min', 'distout_l1_avg', \ |
|
'dist_glob_max_eucl', 'dist_glob_min_eucl', 'dist_glob_avg_eucl', \ |
|
'dist_glob_max_cos', 'dist_glob_min_cos', 'dist_glob_avg_cos', \ |
|
'dist_glob_max_l1', 'dist_glob_min_l1', 'dist_glob_avg_l1'], skiprows=1) |
|
df_ComE_clr = df_ComE[['node_id', 'distin_med_eucl', \ |
|
'distin_eucl_max', 'distin_eucl_min', 'distin_eucl_avg']] |
|
df_ComE_out = df_ComE[['node_id', 'distout_med_eucl', \ |
|
'distout_eucl_max', 'distout_eucl_min', 'distout_eucl_avg']] |
|
df_ComE_gbl = df_ComE[['node_id', 'distout_med_eucl', \ |
|
'dist_glob_max_eucl', 'dist_glob_min_eucl', 'dist_glob_avg_eucl']] |
|
df_ComE_clrout = df_ComE[['node_id', 'distin_med_eucl', 'distout_med_eucl', \ |
|
'distin_eucl_max', 'distin_eucl_min', 'distin_eucl_avg', \ |
|
'distout_eucl_max', 'distout_eucl_min', 'distout_eucl_avg']] |
|
df_ComE_clrgbl = df_ComE[['node_id', 'distin_med_eucl', \ |
|
'distin_eucl_max', 'distin_eucl_min', 'distin_eucl_avg', \ |
|
'dist_glob_max_eucl', 'dist_glob_min_eucl', 'dist_glob_avg_eucl']] |
|
df_ComE_all = df_ComE[['node_id', 'distin_med_eucl', 'distout_med_eucl', \ |
|
'distin_eucl_max', 'distin_eucl_min', 'distin_eucl_avg', \ |
|
'distout_eucl_max', 'distout_eucl_min', 'distout_eucl_avg', \ |
|
'dist_glob_max_eucl', 'dist_glob_min_eucl', 'dist_glob_avg_eucl']] |
|
df_ComE_clr_lst = df_ComE_clr.values.tolist() |
|
df_ComE_out_lst = df_ComE_out.values.tolist() |
|
df_ComE_gbl_lst = df_ComE_gbl.values.tolist() |
|
df_ComE_clrout_lst = df_ComE_clrout.values.tolist() |
|
df_ComE_clrgbl_lst = df_ComE_clrgbl.values.tolist() |
|
df_ComE_all_lst = df_ComE_all.values.tolist() |
|
id_ComE_feats_clr.append(df_ComE_clr_lst) |
|
id_ComE_feats_out.append(df_ComE_out_lst) |
|
id_ComE_feats_gbl.append(df_ComE_gbl_lst) |
|
id_ComE_feats_clrout.append(df_ComE_clrout_lst) |
|
id_ComE_feats_clrgbl.append(df_ComE_clrgbl_lst) |
|
id_ComE_feats_all.append(df_ComE_all_lst) |
|
|
|
for i in id_ComE_feats_clr: |
|
i.sort() |
|
for i in id_ComE_feats_out: |
|
i.sort() |
|
for i in id_ComE_feats_gbl: |
|
i.sort() |
|
for i in id_ComE_feats_clrout: |
|
i.sort() |
|
for i in id_ComE_feats_clrgbl: |
|
i.sort() |
|
for i in id_ComE_feats_all: |
|
i.sort() |
|
|
|
|
|
|
|
folder = os.listdir('classic_features_per_timestep/classic_features') |
|
path = 'classic_features_per_timestep/classic_features' |
|
id_classic_clr = [] |
|
id_classic_gbl = [] |
|
id_classic_all = [] |
|
id_classic_nodeg = [] |
|
for file in folder: |
|
df_classic = pd.read_csv(os.path.join(path,file), names=['node_id', \ |
|
'degree', 'betweenness', 'closeness', 'eigenvector', \ |
|
'degree_ntwk', 'betweenness_ntwk', 'closeness_ntwk', 'eigenvector_ntwk'], \ |
|
skiprows=1) |
|
df_classic_clr = df_classic[['node_id', \ |
|
'degree', 'betweenness', 'closeness', 'eigenvector']] |
|
df_classic_gbl = df_classic[['node_id', \ |
|
'degree_ntwk', 'betweenness_ntwk', 'closeness_ntwk', 'eigenvector_ntwk']] |
|
df_classic_nodeg = pd.read_csv(os.path.join(path,file), names=['node_id', \ |
|
'betweenness', 'closeness', 'eigenvector', \ |
|
'betweenness_ntwk', 'closeness_ntwk', 'eigenvector_ntwk'], \ |
|
skiprows=1) |
|
df_classic_all_lst = df_classic.values.tolist() |
|
df_classic_clr_lst = df_classic_clr.values.tolist() |
|
id_classic_gbl_lst = df_classic_gbl.values.tolist() |
|
id_classic_nodeg_lst = df_classic_nodeg.values.tolist() |
|
id_classic_all.append(df_classic_all_lst) |
|
id_classic_clr.append(df_classic_clr_lst) |
|
id_classic_gbl.append(id_classic_gbl_lst) |
|
id_classic_nodeg.append(id_classic_nodeg_lst) |
|
|
|
for i in id_classic_all: |
|
i.sort() |
|
for i in id_classic_clr: |
|
i.sort() |
|
for i in id_classic_gbl: |
|
i.sort() |
|
for i in id_classic_nodeg: |
|
i.sort() |
|
|
|
id_combo_ComE_clrout_classic_all = [] |
|
for ind,s in enumerate(id_ComE_feats_clrout): |
|
temp = [] |
|
for inx,row in enumerate(s): |
|
tmp = row[:] |
|
tmp.extend(id_classic_all[ind][inx][1:]) |
|
temp.append(tmp) |
|
id_combo_ComE_clrout_classic_all.append(temp) |
|
|
|
|
|
|
|
|
|
|
|
matching = [] |
|
a = 0 |
|
while a<len(clustered_nodes)-1: |
|
matching_two = [] |
|
for indcurr,clrcurr in enumerate(clustered_nodes[a]): |
|
tmp = [] |
|
for indnxt,clrnxt in enumerate(clustered_nodes[a+1]): |
|
num_of_common = len(list(set(clrcurr)&set(clrnxt))) |
|
tmp.append([indcurr,indnxt,num_of_common]) |
|
tmp_max = max(item[-1] for item in tmp) |
|
for t in tmp: |
|
if t[-1] == tmp_max: |
|
maxtmp = t |
|
matching_two.append(maxtmp) |
|
matching.append(matching_two) |
|
a += 1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def twoChain_scd(features): |
|
two_chain_scd = [] |
|
for ind,step in enumerate(matching[:-1]): |
|
per_step = [] |
|
for inx,clr in enumerate(step): |
|
for nodeid in clustered_nodes[ind][clr[0]]: |
|
tmp = [nodeid] |
|
for idfeatures in features[ind]: |
|
if nodeid == idfeatures[0]: |
|
tmp.extend(idfeatures[1:]) |
|
if nodeid in clustered_nodes[ind+1][clr[1]]: |
|
tmp.append(0) |
|
|
|
|
|
|
|
'''for cl in matching[ind+1]: |
|
if nodeid in clustered_nodes[ind+1][cl[0]]: |
|
if nodeid in clustered_nodes[ind+2][cl[1]]: |
|
tmp.append(0)#stay |
|
break |
|
elif nodeid in node_ids[ind+2]: |
|
tmp.append(1)#move |
|
break |
|
else: |
|
tmp.append(2)#drop |
|
break''' |
|
elif nodeid in node_ids[ind+1]: |
|
tmp.append(1) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
else: |
|
|
|
tmp.append(2) |
|
per_step.append(tmp) |
|
two_chain_scd.append(per_step) |
|
return(two_chain_scd) |
|
|
|
def chains_scd(prev_chain_scd, features, a): |
|
curr_chain_scd = copy.deepcopy(prev_chain_scd[:-1]) |
|
for ind,step in enumerate(curr_chain_scd): |
|
for row in step: |
|
if row[-1] == 0 or row[-1] == 1: |
|
for idfeatures in features[ind+2+a]: |
|
if row[0] == idfeatures[0]: |
|
row.extend(idfeatures[1:]) |
|
for cl in matching[ind+2+a]: |
|
if row[0] in clustered_nodes[ind+2+a][cl[0]]: |
|
if row[0] in clustered_nodes[ind+3+a][cl[1]]: |
|
row.append(0) |
|
break |
|
elif row[0] in node_ids[ind+3+a]: |
|
row.append(1) |
|
break |
|
else: |
|
row.append(2) |
|
break |
|
else: |
|
row[-1:-1] = [-1]*(len(features[0][0][1:])+1) |
|
return(curr_chain_scd) |
|
|
|
|
|
|
|
|
|
def chains_sl(chainsSCD): |
|
chainsSL = copy.deepcopy(chainsSCD) |
|
for row in chainsSL: |
|
if row[-1] == 2: |
|
row[-1] = 1 |
|
return(chainsSL) |
|
|
|
|
|
|
|
|
|
def chains_sc(chainsSCD): |
|
chainsSC = [] |
|
for row in chainsSCD: |
|
if row[-1] != 2: |
|
chainsSC.append(row) |
|
return(chainsSC) |
|
|
|
|
|
|
|
def per_chain_all_chains_scd(feats): |
|
two_chain_scd = twoChain_scd(feats) |
|
three_chain_scd = chains_scd(two_chain_scd, feats, 0) |
|
four_chain_scd = chains_scd(three_chain_scd, feats, 1) |
|
five_chain_scd = chains_scd(four_chain_scd, feats, 2) |
|
six_chain_scd = chains_scd(five_chain_scd, feats, 3) |
|
seven_chain_scd = chains_scd(six_chain_scd, feats, 4) |
|
eight_chain_scd = chains_scd(seven_chain_scd, feats, 5) |
|
nine_chain_scd = chains_scd(eight_chain_scd, feats, 6) |
|
two_chain_scd = [row for s in two_chain_scd for row in s] |
|
three_chain_scd = [row for s in three_chain_scd for row in s] |
|
four_chain_scd = [row for s in four_chain_scd for row in s] |
|
five_chain_scd = [row for s in five_chain_scd for row in s] |
|
six_chain_scd = [row for s in six_chain_scd for row in s] |
|
seven_chain_scd = [row for s in seven_chain_scd for row in s] |
|
eight_chain_scd = [row for s in eight_chain_scd for row in s] |
|
nine_chain_scd = [row for s in nine_chain_scd for row in s] |
|
|
|
all_chains_scd = [] |
|
all_chains_scd.append(two_chain_scd) |
|
all_chains_scd.append(three_chain_scd) |
|
all_chains_scd.append(four_chain_scd) |
|
all_chains_scd.append(five_chain_scd) |
|
all_chains_scd.append(six_chain_scd) |
|
all_chains_scd.append(seven_chain_scd) |
|
all_chains_scd.append(eight_chain_scd) |
|
all_chains_scd.append(nine_chain_scd) |
|
all_chains_scd = [row for chain in all_chains_scd for row in chain] |
|
return(two_chain_scd, three_chain_scd, four_chain_scd, five_chain_scd, \ |
|
six_chain_scd, seven_chain_scd, eight_chain_scd, nine_chain_scd, \ |
|
all_chains_scd) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
two_chain_ComE_clr_scd, three_chain_ComE_clr_scd, four_chain_ComE_clr_scd, \ |
|
five_chain_ComE_clr_scd, six_chain_ComE_clr_scd, seven_chain_ComE_clr_scd, \ |
|
eight_chain_ComE_clr_scd, nine_chain_ComE_clr_scd, \ |
|
chains_ComE_clr_scd = per_chain_all_chains_scd(id_ComE_feats_clr) |
|
|
|
two_chain_ComE_clr_sl = chains_sl(two_chain_ComE_clr_scd) |
|
two_chain_ComE_clr_sc = chains_sc(two_chain_ComE_clr_scd) |
|
three_chain_ComE_clr_sl = chains_sl(three_chain_ComE_clr_scd) |
|
three_chain_ComE_clr_sc = chains_sc(three_chain_ComE_clr_scd) |
|
four_chain_ComE_clr_sl = chains_sl(four_chain_ComE_clr_scd) |
|
four_chain_ComE_clr_sc = chains_sc(four_chain_ComE_clr_scd) |
|
five_chain_ComE_clr_sl = chains_sl(five_chain_ComE_clr_scd) |
|
five_chain_ComE_clr_sc = chains_sc(five_chain_ComE_clr_scd) |
|
six_chain_ComE_clr_sl = chains_sl(six_chain_ComE_clr_scd) |
|
six_chain_ComE_clr_sc = chains_sc(six_chain_ComE_clr_scd) |
|
seven_chain_ComE_clr_sl = chains_sl(seven_chain_ComE_clr_scd) |
|
seven_chain_ComE_clr_sc = chains_sc(seven_chain_ComE_clr_scd) |
|
eight_chain_ComE_clr_sl = chains_sl(eight_chain_ComE_clr_scd) |
|
eight_chain_ComE_clr_sc = chains_sc(eight_chain_ComE_clr_scd) |
|
nine_chain_ComE_clr_sl = chains_sl(nine_chain_ComE_clr_scd) |
|
nine_chain_ComE_clr_sc = chains_sc(nine_chain_ComE_clr_scd) |
|
|
|
chains_ComE_clr_sl = chains_sl(chains_ComE_clr_scd) |
|
|
|
chains_ComE_clr_sc = chains_sc(chains_ComE_clr_scd) |
|
|
|
|
|
two_chain_ComE_out_scd, three_chain_ComE_out_scd, four_chain_ComE_out_scd, \ |
|
five_chain_ComE_out_scd, six_chain_ComE_out_scd, seven_chain_ComE_out_scd, \ |
|
eight_chain_ComE_out_scd, nine_chain_ComE_out_scd, \ |
|
chains_ComE_out_scd = per_chain_all_chains_scd(id_ComE_feats_out) |
|
|
|
two_chain_ComE_out_sl = chains_sl(two_chain_ComE_out_scd) |
|
two_chain_ComE_out_sc = chains_sc(two_chain_ComE_out_scd) |
|
three_chain_ComE_out_sl = chains_sl(three_chain_ComE_out_scd) |
|
three_chain_ComE_out_sc = chains_sc(three_chain_ComE_out_scd) |
|
four_chain_ComE_out_sl = chains_sl(four_chain_ComE_out_scd) |
|
four_chain_ComE_out_sc = chains_sc(four_chain_ComE_out_scd) |
|
five_chain_ComE_out_sl = chains_sl(five_chain_ComE_out_scd) |
|
five_chain_ComE_out_sc = chains_sc(five_chain_ComE_out_scd) |
|
six_chain_ComE_out_sl = chains_sl(six_chain_ComE_out_scd) |
|
six_chain_ComE_out_sc = chains_sc(six_chain_ComE_out_scd) |
|
seven_chain_ComE_out_sl = chains_sl(seven_chain_ComE_out_scd) |
|
seven_chain_ComE_out_sc = chains_sc(seven_chain_ComE_out_scd) |
|
eight_chain_ComE_out_sl = chains_sl(eight_chain_ComE_out_scd) |
|
eight_chain_ComE_out_sc = chains_sc(eight_chain_ComE_out_scd) |
|
nine_chain_ComE_out_sl = chains_sl(nine_chain_ComE_out_scd) |
|
nine_chain_ComE_out_sc = chains_sc(nine_chain_ComE_out_scd) |
|
|
|
chains_ComE_out_sl = chains_sl(chains_ComE_out_scd) |
|
|
|
chains_ComE_out_sc = chains_sc(chains_ComE_out_scd) |
|
|
|
|
|
two_chain_ComE_clrout_scd, three_chain_ComE_clrout_scd, four_chain_ComE_clrout_scd, \ |
|
five_chain_ComE_clrout_scd, six_chain_ComE_clrout_scd, seven_chain_ComE_clrout_scd, \ |
|
eight_chain_ComE_clrout_scd, nine_chain_ComE_clrout_scd, \ |
|
chains_ComE_clrout_scd = per_chain_all_chains_scd(id_ComE_feats_clrout) |
|
|
|
two_chain_ComE_clrout_sl = chains_sl(two_chain_ComE_clrout_scd) |
|
two_chain_ComE_clrout_sc = chains_sc(two_chain_ComE_clrout_scd) |
|
three_chain_ComE_clrout_sl = chains_sl(three_chain_ComE_clrout_scd) |
|
three_chain_ComE_clrout_sc = chains_sc(three_chain_ComE_clrout_scd) |
|
four_chain_ComE_clrout_sl = chains_sl(four_chain_ComE_clrout_scd) |
|
four_chain_ComE_clrout_sc = chains_sc(four_chain_ComE_clrout_scd) |
|
five_chain_ComE_clrout_sl = chains_sl(five_chain_ComE_clrout_scd) |
|
five_chain_ComE_clrout_sc = chains_sc(five_chain_ComE_clrout_scd) |
|
six_chain_ComE_clrout_sl = chains_sl(six_chain_ComE_clrout_scd) |
|
six_chain_ComE_clrout_sc = chains_sc(six_chain_ComE_clrout_scd) |
|
seven_chain_ComE_clrout_sl = chains_sl(seven_chain_ComE_clrout_scd) |
|
seven_chain_ComE_clrout_sc = chains_sc(seven_chain_ComE_clrout_scd) |
|
eight_chain_ComE_clrout_sl = chains_sl(eight_chain_ComE_clrout_scd) |
|
eight_chain_ComE_clrout_sc = chains_sc(eight_chain_ComE_clrout_scd) |
|
nine_chain_ComE_clrout_sl = chains_sl(nine_chain_ComE_clrout_scd) |
|
nine_chain_ComE_clrout_sc = chains_sc(nine_chain_ComE_clrout_scd) |
|
|
|
chains_ComE_clrout_sl = chains_sl(chains_ComE_clrout_scd) |
|
|
|
chains_ComE_clrout_sc = chains_sc(chains_ComE_clrout_scd) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
two_chain_classic_clr_scd, three_chain_classic_clr_scd, four_chain_classic_clr_scd, \ |
|
five_chain_classic_clr_scd, six_chain_classic_clr_scd, seven_chain_classic_clr_scd, \ |
|
eight_chain_classic_clr_scd, nine_chain_classic_clr_scd, \ |
|
chains_classic_clr_scd = per_chain_all_chains_scd(id_classic_clr) |
|
|
|
two_chain_classic_clr_sl = chains_sl(two_chain_classic_clr_scd) |
|
two_chain_classic_clr_sc = chains_sc(two_chain_classic_clr_scd) |
|
three_chain_classic_clr_sl = chains_sl(three_chain_classic_clr_scd) |
|
three_chain_classic_clr_sc = chains_sc(three_chain_classic_clr_scd) |
|
four_chain_classic_clr_sl = chains_sl(four_chain_classic_clr_scd) |
|
four_chain_classic_clr_sc = chains_sc(four_chain_classic_clr_scd) |
|
five_chain_classic_clr_sl = chains_sl(five_chain_classic_clr_scd) |
|
five_chain_classic_clr_sc = chains_sc(five_chain_classic_clr_scd) |
|
six_chain_classic_clr_sl = chains_sl(six_chain_classic_clr_scd) |
|
six_chain_classic_clr_sc = chains_sc(six_chain_classic_clr_scd) |
|
seven_chain_classic_clr_sl = chains_sl(seven_chain_classic_clr_scd) |
|
seven_chain_classic_clr_sc = chains_sc(seven_chain_classic_clr_scd) |
|
eight_chain_classic_clr_sl = chains_sl(eight_chain_classic_clr_scd) |
|
eight_chain_classic_clr_sc = chains_sc(eight_chain_classic_clr_scd) |
|
nine_chain_classic_clr_sl = chains_sl(nine_chain_classic_clr_scd) |
|
nine_chain_classic_clr_sc = chains_sc(nine_chain_classic_clr_scd) |
|
|
|
|
|
chains_classic_clr_sl = chains_sl(chains_classic_clr_scd) |
|
|
|
chains_classic_clr_sc = chains_sc(chains_classic_clr_scd) |
|
|
|
|
|
|
|
two_chain_classic_gbl_scd, three_chain_classic_gbl_scd, four_chain_classic_gbl_scd, \ |
|
five_chain_classic_gbl_scd, six_chain_classic_gbl_scd, seven_chain_classic_gbl_scd, \ |
|
eight_chain_classic_gbl_scd, nine_chain_classic_gbl_scd, \ |
|
chains_classic_gbl_scd = per_chain_all_chains_scd(id_classic_gbl) |
|
|
|
two_chain_classic_gbl_sl = chains_sl(two_chain_classic_gbl_scd) |
|
two_chain_classic_gbl_sc = chains_sc(two_chain_classic_gbl_scd) |
|
three_chain_classic_gbl_sl = chains_sl(three_chain_classic_gbl_scd) |
|
three_chain_classic_gbl_sc = chains_sc(three_chain_classic_gbl_scd) |
|
four_chain_classic_gbl_sl = chains_sl(four_chain_classic_gbl_scd) |
|
four_chain_classic_gbl_sc = chains_sc(four_chain_classic_gbl_scd) |
|
five_chain_classic_gbl_sl = chains_sl(five_chain_classic_gbl_scd) |
|
five_chain_classic_gbl_sc = chains_sc(five_chain_classic_gbl_scd) |
|
six_chain_classic_gbl_sl = chains_sl(six_chain_classic_gbl_scd) |
|
six_chain_classic_gbl_sc = chains_sc(six_chain_classic_gbl_scd) |
|
seven_chain_classic_gbl_sl = chains_sl(seven_chain_classic_gbl_scd) |
|
seven_chain_classic_gbl_sc = chains_sc(seven_chain_classic_gbl_scd) |
|
eight_chain_classic_gbl_sl = chains_sl(eight_chain_classic_gbl_scd) |
|
eight_chain_classic_gbl_sc = chains_sc(eight_chain_classic_gbl_scd) |
|
nine_chain_classic_gbl_sl = chains_sl(nine_chain_classic_gbl_scd) |
|
nine_chain_classic_gbl_sc = chains_sc(nine_chain_classic_gbl_scd) |
|
|
|
|
|
chains_classic_gbl_sl = chains_sl(chains_classic_gbl_scd) |
|
|
|
chains_classic_gbl_sc = chains_sc(chains_classic_gbl_scd) |
|
|
|
|
|
|
|
two_chain_classic_all_scd, three_chain_classic_all_scd, four_chain_classic_all_scd, \ |
|
five_chain_classic_all_scd, six_chain_classic_all_scd, seven_chain_classic_all_scd, \ |
|
eight_chain_classic_all_scd, nine_chain_classic_all_scd, \ |
|
chains_classic_all_scd = per_chain_all_chains_scd(id_classic_all) |
|
|
|
two_chain_classic_all_sl = chains_sl(two_chain_classic_all_scd) |
|
two_chain_classic_all_sc = chains_sc(two_chain_classic_all_scd) |
|
three_chain_classic_all_sl = chains_sl(three_chain_classic_all_scd) |
|
three_chain_classic_all_sc = chains_sc(three_chain_classic_all_scd) |
|
four_chain_classic_all_sl = chains_sl(four_chain_classic_all_scd) |
|
four_chain_classic_all_sc = chains_sc(four_chain_classic_all_scd) |
|
five_chain_classic_all_sl = chains_sl(five_chain_classic_all_scd) |
|
five_chain_classic_all_sc = chains_sc(five_chain_classic_all_scd) |
|
six_chain_classic_all_sl = chains_sl(six_chain_classic_all_scd) |
|
six_chain_classic_all_sc = chains_sc(six_chain_classic_all_scd) |
|
seven_chain_classic_all_sl = chains_sl(seven_chain_classic_all_scd) |
|
seven_chain_classic_all_sc = chains_sc(seven_chain_classic_all_scd) |
|
eight_chain_classic_all_sl = chains_sl(eight_chain_classic_all_scd) |
|
eight_chain_classic_all_sc = chains_sc(eight_chain_classic_all_scd) |
|
nine_chain_classic_all_sl = chains_sl(nine_chain_classic_all_scd) |
|
nine_chain_classic_all_sc = chains_sc(nine_chain_classic_all_scd) |
|
|
|
|
|
chains_classic_all_sl = chains_sl(chains_classic_all_scd) |
|
|
|
chains_classic_all_sc = chains_sc(chains_classic_all_scd) |
|
|
|
|
|
|
|
def create_RNN(hidden_units, dense_units, input_shape, activation): |
|
model=Sequential() |
|
model.add(LSTM(hidden_units,input_shape=input_shape)) |
|
model.add(Dense(units=dense_units,activation=activation)) |
|
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) |
|
model.summary() |
|
return model |
|
|
|
|
|
def Classification(chain): |
|
|
|
|
|
chain = shuffle(np.array(chain)) |
|
print("chain2 = ",chain.shape) |
|
X = [i[1:-1] for i in chain.tolist()] |
|
Y = [i[-1] for i in chain.tolist()] |
|
|
|
|
|
longest = len(max(X,key=len)) |
|
print(longest) |
|
|
|
for row in X: |
|
while len(row)<longest: |
|
row.append(-99) |
|
|
|
X = np.array(X) |
|
Y = np.array(Y) |
|
print('Y_dataset:', Counter(Y)) |
|
skf = StratifiedKFold(n_splits=5) |
|
fold = 0 |
|
k=0 |
|
cvscores = [] |
|
for train_index, test_index in skf.split(X, Y): |
|
X_train, X_test = X[train_index], X[test_index] |
|
|
|
y_train, y_test = Y[train_index], Y[test_index] |
|
y_train_cnt = pd.DataFrame([Counter(y_train)]).transpose() |
|
print(y_train_cnt) |
|
y_train_cnt.sort_index(inplace=True) |
|
print('y_train:', y_train_cnt) |
|
|
|
|
|
X_train_3d = X_train.reshape((X_train.shape[0], 4, 1)) |
|
X_test_3d = X_test.reshape((X_test.shape[0], 4, 1)) |
|
|
|
|
|
rnn_model = create_RNN(100,2,input_shape=(4,1),activation='sigmoid') |
|
|
|
rnn_model.fit(X_train_3d, y_train, epochs=50, batch_size=5,verbose=2) |
|
|
|
|
|
scores = rnn_model.evaluate(X_test_3d, y_test, verbose=0) |
|
print("score",scores) |
|
exit() |
|
print("%s: %.2f%%" % (rnn_model.metrics_names[1], scores[1]*100)) |
|
cvscores.append(scores[1] * 100) |
|
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores))) |
|
|
|
|
|
|
|
|
|
|
|
|
|
with open('results_details.csv','a') as fd: |
|
fd.write('two_chain_classic_clr_sl'+'\n') |
|
with open('results.csv','a') as fd: |
|
fd.write('two_chain_classic_clr_sl'+'\n') |
|
Classification(two_chain_classic_clr_sl) |
|
|
|
|