Spaces:

RuijiaTan
/

MultiPrincipalElementAlloyPropertyPredictor

Running

+The principle of the Parser can be as follows:
+·    Firstly, the Parser reads the chemical formula of an alloy (which is the first column in the mechanical property dataset).
+·    Secondly, it cleans redundant symbols in the chemical formula (such as spaces and brackets).
+·    Thereafter, it will normalise the proportion of different elements in the alloy composition. It will then provide the ratio for corresponding element and output a CSV file. The CSV file contains the 27 elements that appear in the dataset, along with the proportion of each element for each MPEA.
+·    The final output is utilised in machine learning processes.

Parser/clear_data.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import re
+'''
+Use normalize_molar_ratios() to turn the original ratios to required format.
+For example: [0.5,0.5,0.5,0.5] -> [0.25,0.25,0.25,0.25]
+'''
+def normalize_molar_ratios(ratios):
+    normalized_ratios = list()
+    ele_sum = sum(ratios)
+    for ele in ratios:
+        ele = float(ele / ele_sum)
+        normalized_ratios.append(ele)
+    return normalized_ratios
+'''
+Turn the original alloy string to every element and corresponding content(without normalization).
+For example: 'Ag2Cu3C' -> result_ele = ['Ag','Cu','C'], result__num = ['2','3','1']
+'''
+def clean_row(row: str):
+    #result_ele = re.split(r'[^a-zA-Z]+', row)
+    result_ele = []
+    ## The list s contains all of the elements in an alloy.
+    s = list(''.join(ch for ch in row if ch.isalpha()))
+    ## Get the list of result_ele.
+    for i in range(len(s)-1):
+        # In terms of the
+        if s[i].isupper() and s[i+1].islower():
+            element = str(s[i]+s[i+1])
+            # print(element)
+            result_ele.append(element)
+        if s[i].isupper() and s[i+1].isupper():
+            element = str(s[i])
+            # print(element)
+            result_ele.append(element)
+    if len(s)!=0:
+        ## The last element is an element which is only one letter, for example 'C', add it to the result_ele.
+        if s[-1].isupper():
+            element = str(s[-1])
+            result_ele.append(element)
+    # print("Elements: ", result_ele)
+    ## Get the list of result__num
+    row_list = list(row)
+    # print(row_list)
+    num_list = row_list.copy()
+    index = 1
+    for i in range(len(num_list)-1):
+        # print(num_list[i])
+        ## In terms of the situation "AgAl0.5" and "CAg0.5", add '1' in the middle of elements.
+        if num_list[i].islower() and num_list[i+1].isupper():
+            row_list.insert(i+index,str(1))
+            index+=1
+            # num_list.append(str(1))
+        if num_list[i].isupper() and num_list[i+1].isupper():
+            row_list.insert(i+index,str(1))
+            index+=1
+            # row_list.append(str(1))
+        # elif row_list[i].is
+    if row_list[-1].isalpha():
+        row_list.append(str(1))
+        # print("New row: "+ "".join(row_list))
+    result_num = re.findall(r'-?\d+\.?\d*e?-?\d*?', "".join(row_list))
+    # print(result_num)
+    result__num = list()
+    for i in result_num:
+        float_ratio = float(i)
+        result__num.append(float_ratio)
+    #     print("Ratios: " + str(result__num))
+    #     # print("Dictionary Format: " + str(ele_dic))
+    # print("Element：" + str(result_ele))
+    # print("Content" + str(result_num))
+    # print("---------------------------")
+    return result_ele, result__num

Parser/element.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# Contains 27 elements.
+elements_list = (["Al", "B", "C", "Co", "Cr", "Cu",	"Fe", "Ga",	"Ge",
+                  "Hf",	"Li", "Mg",	"Mn", "Mo",	"N", "Nb","Ni",	"Sc",
+                  "Si",	"Sn", "Ta",	"Ti", "V", "W",	"Y", "Zn", "Zr"])

Parser/main.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import csv
+import os
+import xlrd
+import numpy as np
+from sklearn.impute import SimpleImputer
+import element
+import clear_data
+import pandas as pd
+'''
+The purpose of this file is to read the contents of the dataset,
+normalize the elements, in the file of "parser_category".
+Output three different null values in three folders "drop_null", "fill_null", "interpolate".
+'''
+category = ["compressive_strength","elongation","hardness","plasticity","tensile_strength","yield_strength"]
+def read_data(category):
+    csv_reader = csv.reader(open(category+".csv"))
+    total_row = sum(1 for line in open(category+".csv"))
+    ## Build a new array whose elements are all 0.
+    result = np.zeros(((total_row, len(element.elements_list))), dtype=float)
+    count = 0
+    for alloy in csv_reader:
+        ## interate every line(alloy) in the csv file.
+        alloy_ratio = clear_data.normalize_molar_ratios(clear_data.clean_row(str(alloy[0]))[1])
+        alloy_dic = dict(zip(clear_data.clean_row(str(alloy[0]))[0], alloy_ratio))
+        ## Add the corresponding ratios at the proper location.
+        for key in alloy_dic.keys():
+            result[count, element.elements_list.index(key)] = float(alloy_dic.get(key))
+        count += 1
+    ## Save the result(array) as the 'Parser.csv'
+    err_csv = os.path.join(os.path.expanduser('.'), 'deploy', 'error.csv')
+    with open("parser_result/parser_category/"+"Parser_element.csv", 'w') as f:
+        writer = csv.writer(f)
+        writer.writerow(element.elements_list)
+        count = 0
+        for row in result:
+            writer.writerow(row)
+            count += 1
+def get_mechnical(path,category):
+    ## For Mechnical Targets.csv
+    m_target = xlrd.open_workbook(path)
+    m_sheet = m_target.sheets()[0]
+    # Get the target data of the machine learning model
+    hardness = m_sheet.col_values(4)[2:]
+    hardness.insert(0,"hardness")
+    yield_strength = m_sheet.col_values(5)[2:]
+    yield_strength.insert(0, "yield_strength")
+    tensile_strength = m_sheet.col_values(6)[2:]
+    tensile_strength.insert(0,"tensile_strength")
+    elongation = m_sheet.col_values(7)[2:]
+    elongation.insert(0,"elongation")
+    compressive_strength = m_sheet.col_values(8)[2:]
+    compressive_strength.insert(0,"compressive_strength")
+    plasticity = m_sheet.col_values(9)[2:]
+    plasticity.insert(0,"plasticity")
+    # Save the mechanical properties of alloys.
+    with open("parser_result/Parser_element.csv") as csvFile:
+        rows = csv.reader(csvFile)
+        with open(("parser_result/parser_category/Parser_"+category+".csv"), 'w') as f:
+            writer = csv.writer(f)
+            index = 0
+            for row in rows:
+                if category=="hardness":
+                    row.append(hardness[index])
+                elif category=="yield_strength":
+                    row.append(yield_strength[index])
+                elif category == "tensile_strength":
+                    row.append(tensile_strength[index])
+                elif category == "elongation":
+                    row.append(elongation[index])
+                elif category == "compressive_strength":
+                    row.append(compressive_strength[index])
+                elif category == "plasticity":
+                    row.append(plasticity[index])
+                writer.writerow(row)
+                index += 1
+        data = pd.read_csv('parser_result/parser_category/Parser_'+category+'.csv')
+        last_column = data.iloc[:, -1]
+        null_ratio = last_column.isnull().mean()
+        print("Null ratio in " + category +"dataset is: ", round(null_ratio,2))
+        # Replace null with 0s.
+        data_fillna = data.fillna(0)
+        df1 = pd.DataFrame(data=data_fillna)
+        df1.to_csv('parser_result/fill_null/'+category+'_fill_null.csv', index=False)
+        # Delete null.
+        data_dropna = data.dropna(axis=0, how='any')
+        df1 = pd.DataFrame(data=data_dropna)
+        df1.to_csv('parser_result/drop_null/'+category+'_drop_null.csv', index=False)
+        # # Split dataset to knn&rf model.
+        # data = data.fillna(0)
+        # df_test = data.drop(index=data.index)
+        # idx = 0
+        # idx_exit = int(data.shape[0] * 0.07)
+        # for index, row in data.iterrows():
+        #     if row.astype(int)[-1] != 0 and idx <= idx_exit:
+        #         df_test = df_test.append(row, ignore_index=True)
+        #         data = data.drop([index])
+        #         idx += 1
+        # df_test.to_csv('parser_result/RF_test/'+category+'_RF_test.csv', index=False)
+        #
+        # # Dealing with rfr_train, split it into knn_train and knn_test.
+        # df_train = pd.DataFrame(data=data)
+        # # Calculate the average number X of data(not 0).
+        # sum_num = 0
+        # num = 0
+        # for index, row in df_train.iterrows():
+        #     if row.astype(int)[-1] != 0:
+        #         num += 1
+        #         sum_num += row.astype(int)[-1]
+        # mean_num = sum_num / num
+        # # df_0: which need to be imputed by KNN.
+        # df_0 = data.drop(index=data.index)
+        # df_pure = data.drop(index=data.index)
+        # for index, row in df_train.iterrows():
+        #     if row.astype(int)[-1] == 0:
+        #         df_0 = df_0.append(row, ignore_index=True)
+        #     else:
+        #         df_pure = df_pure.append(row, ignore_index=True)
+        # df_0.to_csv('parser_result/KNN_test/'+category+'_KNN_test.csv', index=False)
+        # df_pure.to_csv('parser_result/KNN_train/' + category + '_KNN_train.csv', index=False)
+if __name__ =="__main__":
+    read_data("mechanical_composition")
+    for c in category:
+        get_mechnical('mechanical.xls', c)

Parser/parser_result/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

Parser/parser_result/Icon/r ADDED Viewed

File without changes

Parser/parser_result/drop_null/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

Parser/parser_result/drop_null/Icon/r ADDED Viewed

File without changes

Parser/parser_result/fill_null/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

Parser/parser_result/fill_null/Icon/r ADDED Viewed

File without changes

Parser/parser_result/parser_category/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

Parser/parser_result/parser_category/Icon/r ADDED Viewed

File without changes