|
import csv |
|
import os |
|
import xlrd |
|
import numpy as np |
|
from sklearn.impute import SimpleImputer |
|
|
|
import element |
|
import clear_data |
|
import pandas as pd |
|
|
|
''' |
|
The purpose of this file is to read the contents of the dataset, |
|
normalize the elements, in the file of "parser_category". |
|
|
|
Output three different null values in three folders "drop_null", "fill_null", "interpolate". |
|
''' |
|
|
|
category = ["compressive_strength","elongation","hardness","plasticity","tensile_strength","yield_strength"] |
|
|
|
def read_data(category): |
|
csv_reader = csv.reader(open(category+".csv")) |
|
total_row = sum(1 for line in open(category+".csv")) |
|
|
|
|
|
result = np.zeros(((total_row, len(element.elements_list))), dtype=float) |
|
count = 0 |
|
for alloy in csv_reader: |
|
|
|
alloy_ratio = clear_data.normalize_molar_ratios(clear_data.clean_row(str(alloy[0]))[1]) |
|
alloy_dic = dict(zip(clear_data.clean_row(str(alloy[0]))[0], alloy_ratio)) |
|
|
|
|
|
for key in alloy_dic.keys(): |
|
result[count, element.elements_list.index(key)] = float(alloy_dic.get(key)) |
|
count += 1 |
|
|
|
|
|
err_csv = os.path.join(os.path.expanduser('.'), 'deploy', 'error.csv') |
|
|
|
with open("parser_result/parser_category/"+"Parser_element.csv", 'w') as f: |
|
writer = csv.writer(f) |
|
writer.writerow(element.elements_list) |
|
count = 0 |
|
for row in result: |
|
writer.writerow(row) |
|
count += 1 |
|
|
|
def get_mechnical(path,category): |
|
|
|
m_target = xlrd.open_workbook(path) |
|
m_sheet = m_target.sheets()[0] |
|
|
|
|
|
hardness = m_sheet.col_values(4)[2:] |
|
hardness.insert(0,"hardness") |
|
yield_strength = m_sheet.col_values(5)[2:] |
|
yield_strength.insert(0, "yield_strength") |
|
tensile_strength = m_sheet.col_values(6)[2:] |
|
tensile_strength.insert(0,"tensile_strength") |
|
elongation = m_sheet.col_values(7)[2:] |
|
elongation.insert(0,"elongation") |
|
compressive_strength = m_sheet.col_values(8)[2:] |
|
compressive_strength.insert(0,"compressive_strength") |
|
plasticity = m_sheet.col_values(9)[2:] |
|
plasticity.insert(0,"plasticity") |
|
|
|
|
|
with open("parser_result/Parser_element.csv") as csvFile: |
|
rows = csv.reader(csvFile) |
|
with open(("parser_result/parser_category/Parser_"+category+".csv"), 'w') as f: |
|
writer = csv.writer(f) |
|
index = 0 |
|
for row in rows: |
|
if category=="hardness": |
|
row.append(hardness[index]) |
|
elif category=="yield_strength": |
|
row.append(yield_strength[index]) |
|
elif category == "tensile_strength": |
|
row.append(tensile_strength[index]) |
|
elif category == "elongation": |
|
row.append(elongation[index]) |
|
elif category == "compressive_strength": |
|
row.append(compressive_strength[index]) |
|
elif category == "plasticity": |
|
row.append(plasticity[index]) |
|
writer.writerow(row) |
|
index += 1 |
|
data = pd.read_csv('parser_result/parser_category/Parser_'+category+'.csv') |
|
|
|
last_column = data.iloc[:, -1] |
|
null_ratio = last_column.isnull().mean() |
|
print("Null ratio in " + category +"dataset is: ", round(null_ratio,2)) |
|
|
|
|
|
data_fillna = data.fillna(0) |
|
df1 = pd.DataFrame(data=data_fillna) |
|
df1.to_csv('parser_result/fill_null/'+category+'_fill_null.csv', index=False) |
|
|
|
|
|
data_dropna = data.dropna(axis=0, how='any') |
|
df1 = pd.DataFrame(data=data_dropna) |
|
df1.to_csv('parser_result/drop_null/'+category+'_drop_null.csv', index=False) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ =="__main__": |
|
read_data("mechanical_composition") |
|
for c in category: |
|
get_mechnical('mechanical.xls', c) |
|
|
|
|
|
|
|
|
|
|
|
|