RuijiaTan commited on
Commit
c66f952
1 Parent(s): 4265f7a

Upload 14 files

Browse files

Upload Parser files.

Parser/.DS_Store ADDED
Binary file (6.15 kB). View file
 
Parser/Icon/r ADDED
File without changes
Parser/Readme.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ The principle of the Parser can be as follows:
2
+
3
+ · Firstly, the Parser reads the chemical formula of an alloy (which is the first column in the mechanical property dataset).
4
+
5
+ · Secondly, it cleans redundant symbols in the chemical formula (such as spaces and brackets).
6
+
7
+ · Thereafter, it will normalise the proportion of different elements in the alloy composition. It will then provide the ratio for corresponding element and output a CSV file. The CSV file contains the 27 elements that appear in the dataset, along with the proportion of each element for each MPEA.
8
+
9
+ · The final output is utilised in machine learning processes.
Parser/clear_data.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ '''
4
+ Use normalize_molar_ratios() to turn the original ratios to required format.
5
+ For example: [0.5,0.5,0.5,0.5] -> [0.25,0.25,0.25,0.25]
6
+ '''
7
+ def normalize_molar_ratios(ratios):
8
+ normalized_ratios = list()
9
+ ele_sum = sum(ratios)
10
+ for ele in ratios:
11
+ ele = float(ele / ele_sum)
12
+ normalized_ratios.append(ele)
13
+ return normalized_ratios
14
+
15
+
16
+ '''
17
+ Turn the original alloy string to every element and corresponding content(without normalization).
18
+ For example: 'Ag2Cu3C' -> result_ele = ['Ag','Cu','C'], result__num = ['2','3','1']
19
+ '''
20
+ def clean_row(row: str):
21
+ #result_ele = re.split(r'[^a-zA-Z]+', row)
22
+ result_ele = []
23
+
24
+ ## The list s contains all of the elements in an alloy.
25
+ s = list(''.join(ch for ch in row if ch.isalpha()))
26
+
27
+ ## Get the list of result_ele.
28
+ for i in range(len(s)-1):
29
+ # In terms of the
30
+ if s[i].isupper() and s[i+1].islower():
31
+ element = str(s[i]+s[i+1])
32
+ # print(element)
33
+ result_ele.append(element)
34
+ if s[i].isupper() and s[i+1].isupper():
35
+ element = str(s[i])
36
+ # print(element)
37
+ result_ele.append(element)
38
+
39
+ if len(s)!=0:
40
+ ## The last element is an element which is only one letter, for example 'C', add it to the result_ele.
41
+ if s[-1].isupper():
42
+ element = str(s[-1])
43
+ result_ele.append(element)
44
+ # print("Elements: ", result_ele)
45
+
46
+ ## Get the list of result__num
47
+ row_list = list(row)
48
+ # print(row_list)
49
+ num_list = row_list.copy()
50
+ index = 1
51
+ for i in range(len(num_list)-1):
52
+ # print(num_list[i])
53
+ ## In terms of the situation "AgAl0.5" and "CAg0.5", add '1' in the middle of elements.
54
+ if num_list[i].islower() and num_list[i+1].isupper():
55
+ row_list.insert(i+index,str(1))
56
+ index+=1
57
+ # num_list.append(str(1))
58
+ if num_list[i].isupper() and num_list[i+1].isupper():
59
+ row_list.insert(i+index,str(1))
60
+ index+=1
61
+ # row_list.append(str(1))
62
+ # elif row_list[i].is
63
+ if row_list[-1].isalpha():
64
+ row_list.append(str(1))
65
+ # print("New row: "+ "".join(row_list))
66
+
67
+ result_num = re.findall(r'-?\d+\.?\d*e?-?\d*?', "".join(row_list))
68
+ # print(result_num)
69
+ result__num = list()
70
+ for i in result_num:
71
+ float_ratio = float(i)
72
+ result__num.append(float_ratio)
73
+ # print("Ratios: " + str(result__num))
74
+ # # print("Dictionary Format: " + str(ele_dic))
75
+ # print("Element:" + str(result_ele))
76
+ # print("Content" + str(result_num))
77
+ # print("---------------------------")
78
+
79
+ return result_ele, result__num
Parser/element.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Contains 27 elements.
2
+ elements_list = (["Al", "B", "C", "Co", "Cr", "Cu", "Fe", "Ga", "Ge",
3
+ "Hf", "Li", "Mg", "Mn", "Mo", "N", "Nb","Ni", "Sc",
4
+ "Si", "Sn", "Ta", "Ti", "V", "W", "Y", "Zn", "Zr"])
5
+
Parser/main.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import os
3
+ import xlrd
4
+ import numpy as np
5
+ from sklearn.impute import SimpleImputer
6
+
7
+ import element
8
+ import clear_data
9
+ import pandas as pd
10
+
11
+ '''
12
+ The purpose of this file is to read the contents of the dataset,
13
+ normalize the elements, in the file of "parser_category".
14
+
15
+ Output three different null values in three folders "drop_null", "fill_null", "interpolate".
16
+ '''
17
+
18
+ category = ["compressive_strength","elongation","hardness","plasticity","tensile_strength","yield_strength"]
19
+
20
+ def read_data(category):
21
+ csv_reader = csv.reader(open(category+".csv"))
22
+ total_row = sum(1 for line in open(category+".csv"))
23
+
24
+ ## Build a new array whose elements are all 0.
25
+ result = np.zeros(((total_row, len(element.elements_list))), dtype=float)
26
+ count = 0
27
+ for alloy in csv_reader:
28
+ ## interate every line(alloy) in the csv file.
29
+ alloy_ratio = clear_data.normalize_molar_ratios(clear_data.clean_row(str(alloy[0]))[1])
30
+ alloy_dic = dict(zip(clear_data.clean_row(str(alloy[0]))[0], alloy_ratio))
31
+
32
+ ## Add the corresponding ratios at the proper location.
33
+ for key in alloy_dic.keys():
34
+ result[count, element.elements_list.index(key)] = float(alloy_dic.get(key))
35
+ count += 1
36
+
37
+ ## Save the result(array) as the 'Parser.csv'
38
+ err_csv = os.path.join(os.path.expanduser('.'), 'deploy', 'error.csv')
39
+
40
+ with open("parser_result/parser_category/"+"Parser_element.csv", 'w') as f:
41
+ writer = csv.writer(f)
42
+ writer.writerow(element.elements_list)
43
+ count = 0
44
+ for row in result:
45
+ writer.writerow(row)
46
+ count += 1
47
+
48
+ def get_mechnical(path,category):
49
+ ## For Mechnical Targets.csv
50
+ m_target = xlrd.open_workbook(path)
51
+ m_sheet = m_target.sheets()[0]
52
+
53
+ # Get the target data of the machine learning model
54
+ hardness = m_sheet.col_values(4)[2:]
55
+ hardness.insert(0,"hardness")
56
+ yield_strength = m_sheet.col_values(5)[2:]
57
+ yield_strength.insert(0, "yield_strength")
58
+ tensile_strength = m_sheet.col_values(6)[2:]
59
+ tensile_strength.insert(0,"tensile_strength")
60
+ elongation = m_sheet.col_values(7)[2:]
61
+ elongation.insert(0,"elongation")
62
+ compressive_strength = m_sheet.col_values(8)[2:]
63
+ compressive_strength.insert(0,"compressive_strength")
64
+ plasticity = m_sheet.col_values(9)[2:]
65
+ plasticity.insert(0,"plasticity")
66
+
67
+ # Save the mechanical properties of alloys.
68
+ with open("parser_result/Parser_element.csv") as csvFile:
69
+ rows = csv.reader(csvFile)
70
+ with open(("parser_result/parser_category/Parser_"+category+".csv"), 'w') as f:
71
+ writer = csv.writer(f)
72
+ index = 0
73
+ for row in rows:
74
+ if category=="hardness":
75
+ row.append(hardness[index])
76
+ elif category=="yield_strength":
77
+ row.append(yield_strength[index])
78
+ elif category == "tensile_strength":
79
+ row.append(tensile_strength[index])
80
+ elif category == "elongation":
81
+ row.append(elongation[index])
82
+ elif category == "compressive_strength":
83
+ row.append(compressive_strength[index])
84
+ elif category == "plasticity":
85
+ row.append(plasticity[index])
86
+ writer.writerow(row)
87
+ index += 1
88
+ data = pd.read_csv('parser_result/parser_category/Parser_'+category+'.csv')
89
+
90
+ last_column = data.iloc[:, -1]
91
+ null_ratio = last_column.isnull().mean()
92
+ print("Null ratio in " + category +"dataset is: ", round(null_ratio,2))
93
+
94
+ # Replace null with 0s.
95
+ data_fillna = data.fillna(0)
96
+ df1 = pd.DataFrame(data=data_fillna)
97
+ df1.to_csv('parser_result/fill_null/'+category+'_fill_null.csv', index=False)
98
+
99
+ # Delete null.
100
+ data_dropna = data.dropna(axis=0, how='any')
101
+ df1 = pd.DataFrame(data=data_dropna)
102
+ df1.to_csv('parser_result/drop_null/'+category+'_drop_null.csv', index=False)
103
+
104
+ # # Split dataset to knn&rf model.
105
+ # data = data.fillna(0)
106
+ # df_test = data.drop(index=data.index)
107
+ # idx = 0
108
+ # idx_exit = int(data.shape[0] * 0.07)
109
+ # for index, row in data.iterrows():
110
+ # if row.astype(int)[-1] != 0 and idx <= idx_exit:
111
+ # df_test = df_test.append(row, ignore_index=True)
112
+ # data = data.drop([index])
113
+ # idx += 1
114
+ # df_test.to_csv('parser_result/RF_test/'+category+'_RF_test.csv', index=False)
115
+ #
116
+ # # Dealing with rfr_train, split it into knn_train and knn_test.
117
+ # df_train = pd.DataFrame(data=data)
118
+ # # Calculate the average number X of data(not 0).
119
+ # sum_num = 0
120
+ # num = 0
121
+ # for index, row in df_train.iterrows():
122
+ # if row.astype(int)[-1] != 0:
123
+ # num += 1
124
+ # sum_num += row.astype(int)[-1]
125
+ # mean_num = sum_num / num
126
+ # # df_0: which need to be imputed by KNN.
127
+ # df_0 = data.drop(index=data.index)
128
+ # df_pure = data.drop(index=data.index)
129
+ # for index, row in df_train.iterrows():
130
+ # if row.astype(int)[-1] == 0:
131
+ # df_0 = df_0.append(row, ignore_index=True)
132
+ # else:
133
+ # df_pure = df_pure.append(row, ignore_index=True)
134
+ # df_0.to_csv('parser_result/KNN_test/'+category+'_KNN_test.csv', index=False)
135
+ # df_pure.to_csv('parser_result/KNN_train/' + category + '_KNN_train.csv', index=False)
136
+
137
+
138
+ if __name__ =="__main__":
139
+ read_data("mechanical_composition")
140
+ for c in category:
141
+ get_mechnical('mechanical.xls', c)
142
+
143
+
144
+
145
+
146
+
Parser/parser_result/.DS_Store ADDED
Binary file (6.15 kB). View file
 
Parser/parser_result/Icon/r ADDED
File without changes
Parser/parser_result/drop_null/.DS_Store ADDED
Binary file (6.15 kB). View file
 
Parser/parser_result/drop_null/Icon/r ADDED
File without changes
Parser/parser_result/fill_null/.DS_Store ADDED
Binary file (6.15 kB). View file
 
Parser/parser_result/fill_null/Icon/r ADDED
File without changes
Parser/parser_result/parser_category/.DS_Store ADDED
Binary file (6.15 kB). View file
 
Parser/parser_result/parser_category/Icon/r ADDED
File without changes