Spaces:

RuijiaTan
/

MultiPrincipalElementAlloyPropertyPredictor

Running

App Files Files Community

MultiPrincipalElementAlloyPropertyPredictor / Parser /main.py

RuijiaTan

Upload 14 files

c66f952 about 1 year ago

raw

history blame

5.68 kB

	import csv
	import os
	import xlrd
	import numpy as np
	from sklearn.impute import SimpleImputer

	import element
	import clear_data
	import pandas as pd

	'''
	The purpose of this file is to read the contents of the dataset,
	normalize the elements, in the file of "parser_category".

	Output three different null values in three folders "drop_null", "fill_null", "interpolate".
	'''

	category = ["compressive_strength","elongation","hardness","plasticity","tensile_strength","yield_strength"]

	def read_data(category):
	csv_reader = csv.reader(open(category+".csv"))
	total_row = sum(1 for line in open(category+".csv"))

	## Build a new array whose elements are all 0.
	result = np.zeros(((total_row, len(element.elements_list))), dtype=float)
	count = 0
	for alloy in csv_reader:
	## interate every line(alloy) in the csv file.
	alloy_ratio = clear_data.normalize_molar_ratios(clear_data.clean_row(str(alloy[0]))[1])
	alloy_dic = dict(zip(clear_data.clean_row(str(alloy[0]))[0], alloy_ratio))

	## Add the corresponding ratios at the proper location.
	for key in alloy_dic.keys():
	result[count, element.elements_list.index(key)] = float(alloy_dic.get(key))
	count += 1

	## Save the result(array) as the 'Parser.csv'
	err_csv = os.path.join(os.path.expanduser('.'), 'deploy', 'error.csv')

	with open("parser_result/parser_category/"+"Parser_element.csv", 'w') as f:
	writer = csv.writer(f)
	writer.writerow(element.elements_list)
	count = 0
	for row in result:
	writer.writerow(row)
	count += 1

	def get_mechnical(path,category):
	## For Mechnical Targets.csv
	m_target = xlrd.open_workbook(path)
	m_sheet = m_target.sheets()[0]

	# Get the target data of the machine learning model
	hardness = m_sheet.col_values(4)[2:]
	hardness.insert(0,"hardness")
	yield_strength = m_sheet.col_values(5)[2:]
	yield_strength.insert(0, "yield_strength")
	tensile_strength = m_sheet.col_values(6)[2:]
	tensile_strength.insert(0,"tensile_strength")
	elongation = m_sheet.col_values(7)[2:]
	elongation.insert(0,"elongation")
	compressive_strength = m_sheet.col_values(8)[2:]
	compressive_strength.insert(0,"compressive_strength")
	plasticity = m_sheet.col_values(9)[2:]
	plasticity.insert(0,"plasticity")

	# Save the mechanical properties of alloys.
	with open("parser_result/Parser_element.csv") as csvFile:
	rows = csv.reader(csvFile)
	with open(("parser_result/parser_category/Parser_"+category+".csv"), 'w') as f:
	writer = csv.writer(f)
	index = 0
	for row in rows:
	if category=="hardness":
	row.append(hardness[index])
	elif category=="yield_strength":
	row.append(yield_strength[index])
	elif category == "tensile_strength":
	row.append(tensile_strength[index])
	elif category == "elongation":
	row.append(elongation[index])
	elif category == "compressive_strength":
	row.append(compressive_strength[index])
	elif category == "plasticity":
	row.append(plasticity[index])
	writer.writerow(row)
	index += 1
	data = pd.read_csv('parser_result/parser_category/Parser_'+category+'.csv')

	last_column = data.iloc[:, -1]
	null_ratio = last_column.isnull().mean()
	print("Null ratio in " + category +"dataset is: ", round(null_ratio,2))

	# Replace null with 0s.
	data_fillna = data.fillna(0)
	df1 = pd.DataFrame(data=data_fillna)
	df1.to_csv('parser_result/fill_null/'+category+'_fill_null.csv', index=False)

	# Delete null.
	data_dropna = data.dropna(axis=0, how='any')
	df1 = pd.DataFrame(data=data_dropna)
	df1.to_csv('parser_result/drop_null/'+category+'_drop_null.csv', index=False)

	# # Split dataset to knn&rf model.
	# data = data.fillna(0)
	# df_test = data.drop(index=data.index)
	# idx = 0
	# idx_exit = int(data.shape[0] * 0.07)
	# for index, row in data.iterrows():
	# if row.astype(int)[-1] != 0 and idx <= idx_exit:
	# df_test = df_test.append(row, ignore_index=True)
	# data = data.drop([index])
	# idx += 1
	# df_test.to_csv('parser_result/RF_test/'+category+'_RF_test.csv', index=False)
	#
	# # Dealing with rfr_train, split it into knn_train and knn_test.
	# df_train = pd.DataFrame(data=data)
	# # Calculate the average number X of data(not 0).
	# sum_num = 0
	# num = 0
	# for index, row in df_train.iterrows():
	# if row.astype(int)[-1] != 0:
	# num += 1
	# sum_num += row.astype(int)[-1]
	# mean_num = sum_num / num
	# # df_0: which need to be imputed by KNN.
	# df_0 = data.drop(index=data.index)
	# df_pure = data.drop(index=data.index)
	# for index, row in df_train.iterrows():
	# if row.astype(int)[-1] == 0:
	# df_0 = df_0.append(row, ignore_index=True)
	# else:
	# df_pure = df_pure.append(row, ignore_index=True)
	# df_0.to_csv('parser_result/KNN_test/'+category+'_KNN_test.csv', index=False)
	# df_pure.to_csv('parser_result/KNN_train/' + category + '_KNN_train.csv', index=False)


	if __name__ =="__main__":
	read_data("mechanical_composition")
	for c in category:
	get_mechnical('mechanical.xls', c)