Upload 14 files
Browse filesUpload Parser files.
- Parser/.DS_Store +0 -0
- Parser/Icon/r +0 -0
- Parser/Readme.md +9 -0
- Parser/clear_data.py +79 -0
- Parser/element.py +5 -0
- Parser/main.py +146 -0
- Parser/parser_result/.DS_Store +0 -0
- Parser/parser_result/Icon/r +0 -0
- Parser/parser_result/drop_null/.DS_Store +0 -0
- Parser/parser_result/drop_null/Icon/r +0 -0
- Parser/parser_result/fill_null/.DS_Store +0 -0
- Parser/parser_result/fill_null/Icon/r +0 -0
- Parser/parser_result/parser_category/.DS_Store +0 -0
- Parser/parser_result/parser_category/Icon/r +0 -0
Parser/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
Parser/Icon/r
ADDED
File without changes
|
Parser/Readme.md
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
The principle of the Parser can be as follows:
|
2 |
+
|
3 |
+
· Firstly, the Parser reads the chemical formula of an alloy (which is the first column in the mechanical property dataset).
|
4 |
+
|
5 |
+
· Secondly, it cleans redundant symbols in the chemical formula (such as spaces and brackets).
|
6 |
+
|
7 |
+
· Thereafter, it will normalise the proportion of different elements in the alloy composition. It will then provide the ratio for corresponding element and output a CSV file. The CSV file contains the 27 elements that appear in the dataset, along with the proportion of each element for each MPEA.
|
8 |
+
|
9 |
+
· The final output is utilised in machine learning processes.
|
Parser/clear_data.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
'''
|
4 |
+
Use normalize_molar_ratios() to turn the original ratios to required format.
|
5 |
+
For example: [0.5,0.5,0.5,0.5] -> [0.25,0.25,0.25,0.25]
|
6 |
+
'''
|
7 |
+
def normalize_molar_ratios(ratios):
|
8 |
+
normalized_ratios = list()
|
9 |
+
ele_sum = sum(ratios)
|
10 |
+
for ele in ratios:
|
11 |
+
ele = float(ele / ele_sum)
|
12 |
+
normalized_ratios.append(ele)
|
13 |
+
return normalized_ratios
|
14 |
+
|
15 |
+
|
16 |
+
'''
|
17 |
+
Turn the original alloy string to every element and corresponding content(without normalization).
|
18 |
+
For example: 'Ag2Cu3C' -> result_ele = ['Ag','Cu','C'], result__num = ['2','3','1']
|
19 |
+
'''
|
20 |
+
def clean_row(row: str):
|
21 |
+
#result_ele = re.split(r'[^a-zA-Z]+', row)
|
22 |
+
result_ele = []
|
23 |
+
|
24 |
+
## The list s contains all of the elements in an alloy.
|
25 |
+
s = list(''.join(ch for ch in row if ch.isalpha()))
|
26 |
+
|
27 |
+
## Get the list of result_ele.
|
28 |
+
for i in range(len(s)-1):
|
29 |
+
# In terms of the
|
30 |
+
if s[i].isupper() and s[i+1].islower():
|
31 |
+
element = str(s[i]+s[i+1])
|
32 |
+
# print(element)
|
33 |
+
result_ele.append(element)
|
34 |
+
if s[i].isupper() and s[i+1].isupper():
|
35 |
+
element = str(s[i])
|
36 |
+
# print(element)
|
37 |
+
result_ele.append(element)
|
38 |
+
|
39 |
+
if len(s)!=0:
|
40 |
+
## The last element is an element which is only one letter, for example 'C', add it to the result_ele.
|
41 |
+
if s[-1].isupper():
|
42 |
+
element = str(s[-1])
|
43 |
+
result_ele.append(element)
|
44 |
+
# print("Elements: ", result_ele)
|
45 |
+
|
46 |
+
## Get the list of result__num
|
47 |
+
row_list = list(row)
|
48 |
+
# print(row_list)
|
49 |
+
num_list = row_list.copy()
|
50 |
+
index = 1
|
51 |
+
for i in range(len(num_list)-1):
|
52 |
+
# print(num_list[i])
|
53 |
+
## In terms of the situation "AgAl0.5" and "CAg0.5", add '1' in the middle of elements.
|
54 |
+
if num_list[i].islower() and num_list[i+1].isupper():
|
55 |
+
row_list.insert(i+index,str(1))
|
56 |
+
index+=1
|
57 |
+
# num_list.append(str(1))
|
58 |
+
if num_list[i].isupper() and num_list[i+1].isupper():
|
59 |
+
row_list.insert(i+index,str(1))
|
60 |
+
index+=1
|
61 |
+
# row_list.append(str(1))
|
62 |
+
# elif row_list[i].is
|
63 |
+
if row_list[-1].isalpha():
|
64 |
+
row_list.append(str(1))
|
65 |
+
# print("New row: "+ "".join(row_list))
|
66 |
+
|
67 |
+
result_num = re.findall(r'-?\d+\.?\d*e?-?\d*?', "".join(row_list))
|
68 |
+
# print(result_num)
|
69 |
+
result__num = list()
|
70 |
+
for i in result_num:
|
71 |
+
float_ratio = float(i)
|
72 |
+
result__num.append(float_ratio)
|
73 |
+
# print("Ratios: " + str(result__num))
|
74 |
+
# # print("Dictionary Format: " + str(ele_dic))
|
75 |
+
# print("Element:" + str(result_ele))
|
76 |
+
# print("Content" + str(result_num))
|
77 |
+
# print("---------------------------")
|
78 |
+
|
79 |
+
return result_ele, result__num
|
Parser/element.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Contains 27 elements.
|
2 |
+
elements_list = (["Al", "B", "C", "Co", "Cr", "Cu", "Fe", "Ga", "Ge",
|
3 |
+
"Hf", "Li", "Mg", "Mn", "Mo", "N", "Nb","Ni", "Sc",
|
4 |
+
"Si", "Sn", "Ta", "Ti", "V", "W", "Y", "Zn", "Zr"])
|
5 |
+
|
Parser/main.py
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import csv
|
2 |
+
import os
|
3 |
+
import xlrd
|
4 |
+
import numpy as np
|
5 |
+
from sklearn.impute import SimpleImputer
|
6 |
+
|
7 |
+
import element
|
8 |
+
import clear_data
|
9 |
+
import pandas as pd
|
10 |
+
|
11 |
+
'''
|
12 |
+
The purpose of this file is to read the contents of the dataset,
|
13 |
+
normalize the elements, in the file of "parser_category".
|
14 |
+
|
15 |
+
Output three different null values in three folders "drop_null", "fill_null", "interpolate".
|
16 |
+
'''
|
17 |
+
|
18 |
+
category = ["compressive_strength","elongation","hardness","plasticity","tensile_strength","yield_strength"]
|
19 |
+
|
20 |
+
def read_data(category):
|
21 |
+
csv_reader = csv.reader(open(category+".csv"))
|
22 |
+
total_row = sum(1 for line in open(category+".csv"))
|
23 |
+
|
24 |
+
## Build a new array whose elements are all 0.
|
25 |
+
result = np.zeros(((total_row, len(element.elements_list))), dtype=float)
|
26 |
+
count = 0
|
27 |
+
for alloy in csv_reader:
|
28 |
+
## interate every line(alloy) in the csv file.
|
29 |
+
alloy_ratio = clear_data.normalize_molar_ratios(clear_data.clean_row(str(alloy[0]))[1])
|
30 |
+
alloy_dic = dict(zip(clear_data.clean_row(str(alloy[0]))[0], alloy_ratio))
|
31 |
+
|
32 |
+
## Add the corresponding ratios at the proper location.
|
33 |
+
for key in alloy_dic.keys():
|
34 |
+
result[count, element.elements_list.index(key)] = float(alloy_dic.get(key))
|
35 |
+
count += 1
|
36 |
+
|
37 |
+
## Save the result(array) as the 'Parser.csv'
|
38 |
+
err_csv = os.path.join(os.path.expanduser('.'), 'deploy', 'error.csv')
|
39 |
+
|
40 |
+
with open("parser_result/parser_category/"+"Parser_element.csv", 'w') as f:
|
41 |
+
writer = csv.writer(f)
|
42 |
+
writer.writerow(element.elements_list)
|
43 |
+
count = 0
|
44 |
+
for row in result:
|
45 |
+
writer.writerow(row)
|
46 |
+
count += 1
|
47 |
+
|
48 |
+
def get_mechnical(path,category):
|
49 |
+
## For Mechnical Targets.csv
|
50 |
+
m_target = xlrd.open_workbook(path)
|
51 |
+
m_sheet = m_target.sheets()[0]
|
52 |
+
|
53 |
+
# Get the target data of the machine learning model
|
54 |
+
hardness = m_sheet.col_values(4)[2:]
|
55 |
+
hardness.insert(0,"hardness")
|
56 |
+
yield_strength = m_sheet.col_values(5)[2:]
|
57 |
+
yield_strength.insert(0, "yield_strength")
|
58 |
+
tensile_strength = m_sheet.col_values(6)[2:]
|
59 |
+
tensile_strength.insert(0,"tensile_strength")
|
60 |
+
elongation = m_sheet.col_values(7)[2:]
|
61 |
+
elongation.insert(0,"elongation")
|
62 |
+
compressive_strength = m_sheet.col_values(8)[2:]
|
63 |
+
compressive_strength.insert(0,"compressive_strength")
|
64 |
+
plasticity = m_sheet.col_values(9)[2:]
|
65 |
+
plasticity.insert(0,"plasticity")
|
66 |
+
|
67 |
+
# Save the mechanical properties of alloys.
|
68 |
+
with open("parser_result/Parser_element.csv") as csvFile:
|
69 |
+
rows = csv.reader(csvFile)
|
70 |
+
with open(("parser_result/parser_category/Parser_"+category+".csv"), 'w') as f:
|
71 |
+
writer = csv.writer(f)
|
72 |
+
index = 0
|
73 |
+
for row in rows:
|
74 |
+
if category=="hardness":
|
75 |
+
row.append(hardness[index])
|
76 |
+
elif category=="yield_strength":
|
77 |
+
row.append(yield_strength[index])
|
78 |
+
elif category == "tensile_strength":
|
79 |
+
row.append(tensile_strength[index])
|
80 |
+
elif category == "elongation":
|
81 |
+
row.append(elongation[index])
|
82 |
+
elif category == "compressive_strength":
|
83 |
+
row.append(compressive_strength[index])
|
84 |
+
elif category == "plasticity":
|
85 |
+
row.append(plasticity[index])
|
86 |
+
writer.writerow(row)
|
87 |
+
index += 1
|
88 |
+
data = pd.read_csv('parser_result/parser_category/Parser_'+category+'.csv')
|
89 |
+
|
90 |
+
last_column = data.iloc[:, -1]
|
91 |
+
null_ratio = last_column.isnull().mean()
|
92 |
+
print("Null ratio in " + category +"dataset is: ", round(null_ratio,2))
|
93 |
+
|
94 |
+
# Replace null with 0s.
|
95 |
+
data_fillna = data.fillna(0)
|
96 |
+
df1 = pd.DataFrame(data=data_fillna)
|
97 |
+
df1.to_csv('parser_result/fill_null/'+category+'_fill_null.csv', index=False)
|
98 |
+
|
99 |
+
# Delete null.
|
100 |
+
data_dropna = data.dropna(axis=0, how='any')
|
101 |
+
df1 = pd.DataFrame(data=data_dropna)
|
102 |
+
df1.to_csv('parser_result/drop_null/'+category+'_drop_null.csv', index=False)
|
103 |
+
|
104 |
+
# # Split dataset to knn&rf model.
|
105 |
+
# data = data.fillna(0)
|
106 |
+
# df_test = data.drop(index=data.index)
|
107 |
+
# idx = 0
|
108 |
+
# idx_exit = int(data.shape[0] * 0.07)
|
109 |
+
# for index, row in data.iterrows():
|
110 |
+
# if row.astype(int)[-1] != 0 and idx <= idx_exit:
|
111 |
+
# df_test = df_test.append(row, ignore_index=True)
|
112 |
+
# data = data.drop([index])
|
113 |
+
# idx += 1
|
114 |
+
# df_test.to_csv('parser_result/RF_test/'+category+'_RF_test.csv', index=False)
|
115 |
+
#
|
116 |
+
# # Dealing with rfr_train, split it into knn_train and knn_test.
|
117 |
+
# df_train = pd.DataFrame(data=data)
|
118 |
+
# # Calculate the average number X of data(not 0).
|
119 |
+
# sum_num = 0
|
120 |
+
# num = 0
|
121 |
+
# for index, row in df_train.iterrows():
|
122 |
+
# if row.astype(int)[-1] != 0:
|
123 |
+
# num += 1
|
124 |
+
# sum_num += row.astype(int)[-1]
|
125 |
+
# mean_num = sum_num / num
|
126 |
+
# # df_0: which need to be imputed by KNN.
|
127 |
+
# df_0 = data.drop(index=data.index)
|
128 |
+
# df_pure = data.drop(index=data.index)
|
129 |
+
# for index, row in df_train.iterrows():
|
130 |
+
# if row.astype(int)[-1] == 0:
|
131 |
+
# df_0 = df_0.append(row, ignore_index=True)
|
132 |
+
# else:
|
133 |
+
# df_pure = df_pure.append(row, ignore_index=True)
|
134 |
+
# df_0.to_csv('parser_result/KNN_test/'+category+'_KNN_test.csv', index=False)
|
135 |
+
# df_pure.to_csv('parser_result/KNN_train/' + category + '_KNN_train.csv', index=False)
|
136 |
+
|
137 |
+
|
138 |
+
if __name__ =="__main__":
|
139 |
+
read_data("mechanical_composition")
|
140 |
+
for c in category:
|
141 |
+
get_mechnical('mechanical.xls', c)
|
142 |
+
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
|
Parser/parser_result/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
Parser/parser_result/Icon/r
ADDED
File without changes
|
Parser/parser_result/drop_null/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
Parser/parser_result/drop_null/Icon/r
ADDED
File without changes
|
Parser/parser_result/fill_null/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
Parser/parser_result/fill_null/Icon/r
ADDED
File without changes
|
Parser/parser_result/parser_category/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
Parser/parser_result/parser_category/Icon/r
ADDED
File without changes
|