RuijiaTan's picture
Upload 14 files
c66f952
raw
history blame
5.68 kB
import csv
import os
import xlrd
import numpy as np
from sklearn.impute import SimpleImputer
import element
import clear_data
import pandas as pd
'''
The purpose of this file is to read the contents of the dataset,
normalize the elements, in the file of "parser_category".
Output three different null values in three folders "drop_null", "fill_null", "interpolate".
'''
category = ["compressive_strength","elongation","hardness","plasticity","tensile_strength","yield_strength"]
def read_data(category):
csv_reader = csv.reader(open(category+".csv"))
total_row = sum(1 for line in open(category+".csv"))
## Build a new array whose elements are all 0.
result = np.zeros(((total_row, len(element.elements_list))), dtype=float)
count = 0
for alloy in csv_reader:
## interate every line(alloy) in the csv file.
alloy_ratio = clear_data.normalize_molar_ratios(clear_data.clean_row(str(alloy[0]))[1])
alloy_dic = dict(zip(clear_data.clean_row(str(alloy[0]))[0], alloy_ratio))
## Add the corresponding ratios at the proper location.
for key in alloy_dic.keys():
result[count, element.elements_list.index(key)] = float(alloy_dic.get(key))
count += 1
## Save the result(array) as the 'Parser.csv'
err_csv = os.path.join(os.path.expanduser('.'), 'deploy', 'error.csv')
with open("parser_result/parser_category/"+"Parser_element.csv", 'w') as f:
writer = csv.writer(f)
writer.writerow(element.elements_list)
count = 0
for row in result:
writer.writerow(row)
count += 1
def get_mechnical(path,category):
## For Mechnical Targets.csv
m_target = xlrd.open_workbook(path)
m_sheet = m_target.sheets()[0]
# Get the target data of the machine learning model
hardness = m_sheet.col_values(4)[2:]
hardness.insert(0,"hardness")
yield_strength = m_sheet.col_values(5)[2:]
yield_strength.insert(0, "yield_strength")
tensile_strength = m_sheet.col_values(6)[2:]
tensile_strength.insert(0,"tensile_strength")
elongation = m_sheet.col_values(7)[2:]
elongation.insert(0,"elongation")
compressive_strength = m_sheet.col_values(8)[2:]
compressive_strength.insert(0,"compressive_strength")
plasticity = m_sheet.col_values(9)[2:]
plasticity.insert(0,"plasticity")
# Save the mechanical properties of alloys.
with open("parser_result/Parser_element.csv") as csvFile:
rows = csv.reader(csvFile)
with open(("parser_result/parser_category/Parser_"+category+".csv"), 'w') as f:
writer = csv.writer(f)
index = 0
for row in rows:
if category=="hardness":
row.append(hardness[index])
elif category=="yield_strength":
row.append(yield_strength[index])
elif category == "tensile_strength":
row.append(tensile_strength[index])
elif category == "elongation":
row.append(elongation[index])
elif category == "compressive_strength":
row.append(compressive_strength[index])
elif category == "plasticity":
row.append(plasticity[index])
writer.writerow(row)
index += 1
data = pd.read_csv('parser_result/parser_category/Parser_'+category+'.csv')
last_column = data.iloc[:, -1]
null_ratio = last_column.isnull().mean()
print("Null ratio in " + category +"dataset is: ", round(null_ratio,2))
# Replace null with 0s.
data_fillna = data.fillna(0)
df1 = pd.DataFrame(data=data_fillna)
df1.to_csv('parser_result/fill_null/'+category+'_fill_null.csv', index=False)
# Delete null.
data_dropna = data.dropna(axis=0, how='any')
df1 = pd.DataFrame(data=data_dropna)
df1.to_csv('parser_result/drop_null/'+category+'_drop_null.csv', index=False)
# # Split dataset to knn&rf model.
# data = data.fillna(0)
# df_test = data.drop(index=data.index)
# idx = 0
# idx_exit = int(data.shape[0] * 0.07)
# for index, row in data.iterrows():
# if row.astype(int)[-1] != 0 and idx <= idx_exit:
# df_test = df_test.append(row, ignore_index=True)
# data = data.drop([index])
# idx += 1
# df_test.to_csv('parser_result/RF_test/'+category+'_RF_test.csv', index=False)
#
# # Dealing with rfr_train, split it into knn_train and knn_test.
# df_train = pd.DataFrame(data=data)
# # Calculate the average number X of data(not 0).
# sum_num = 0
# num = 0
# for index, row in df_train.iterrows():
# if row.astype(int)[-1] != 0:
# num += 1
# sum_num += row.astype(int)[-1]
# mean_num = sum_num / num
# # df_0: which need to be imputed by KNN.
# df_0 = data.drop(index=data.index)
# df_pure = data.drop(index=data.index)
# for index, row in df_train.iterrows():
# if row.astype(int)[-1] == 0:
# df_0 = df_0.append(row, ignore_index=True)
# else:
# df_pure = df_pure.append(row, ignore_index=True)
# df_0.to_csv('parser_result/KNN_test/'+category+'_KNN_test.csv', index=False)
# df_pure.to_csv('parser_result/KNN_train/' + category + '_KNN_train.csv', index=False)
if __name__ =="__main__":
read_data("mechanical_composition")
for c in category:
get_mechnical('mechanical.xls', c)