import re ''' Use normalize_molar_ratios() to turn the original ratios to required format. For example: [0.5,0.5,0.5,0.5] -> [0.25,0.25,0.25,0.25] ''' def normalize_molar_ratios(ratios): normalized_ratios = list() ele_sum = sum(ratios) for ele in ratios: ele = float(ele / ele_sum) normalized_ratios.append(ele) return normalized_ratios ''' Turn the original alloy string to every element and corresponding content(without normalization). For example: 'Ag2Cu3C' -> result_ele = ['Ag','Cu','C'], result__num = ['2','3','1'] ''' def clean_row(row: str): #result_ele = re.split(r'[^a-zA-Z]+', row) result_ele = [] ## The list s contains all of the elements in an alloy. s = list(''.join(ch for ch in row if ch.isalpha())) ## Get the list of result_ele. for i in range(len(s)-1): # In terms of the if s[i].isupper() and s[i+1].islower(): element = str(s[i]+s[i+1]) # print(element) result_ele.append(element) if s[i].isupper() and s[i+1].isupper(): element = str(s[i]) # print(element) result_ele.append(element) if len(s)!=0: ## The last element is an element which is only one letter, for example 'C', add it to the result_ele. if s[-1].isupper(): element = str(s[-1]) result_ele.append(element) # print("Elements: ", result_ele) ## Get the list of result__num row_list = list(row) # print(row_list) num_list = row_list.copy() index = 1 for i in range(len(num_list)-1): # print(num_list[i]) ## In terms of the situation "AgAl0.5" and "CAg0.5", add '1' in the middle of elements. if num_list[i].islower() and num_list[i+1].isupper(): row_list.insert(i+index,str(1)) index+=1 # num_list.append(str(1)) if num_list[i].isupper() and num_list[i+1].isupper(): row_list.insert(i+index,str(1)) index+=1 # row_list.append(str(1)) # elif row_list[i].is if row_list[-1].isalpha(): row_list.append(str(1)) # print("New row: "+ "".join(row_list)) result_num = re.findall(r'-?\d+\.?\d*e?-?\d*?', "".join(row_list)) # print(result_num) result__num = list() for i in result_num: float_ratio = float(i) result__num.append(float_ratio) # print("Ratios: " + str(result__num)) # # print("Dictionary Format: " + str(ele_dic)) # print("Element:" + str(result_ele)) # print("Content" + str(result_num)) # print("---------------------------") return result_ele, result__num