Spaces:

woshixuhao
/

Rf_prediction

Sleeping

App Files Files

Rf_prediction / app.py

woshixuhao

Update app.py

b90acdd over 1 year ago

raw

history blame

24.8 kB

	import argparse
	import os
	from rdkit import Chem
	import sys
	import joblib
	sys.modules['sklearn.externals.joblib'] = joblib
	from sklearn.externals import joblib
	import numpy as np
	from rdkit.Chem import Descriptors
	from rdkit.Chem import rdMolDescriptors
	from xgboost.sklearn import XGBClassifier,XGBRegressor
	import torch
	import torch.nn.functional as F
	from torch.autograd import Variable
	from rdkit.Chem import MACCSkeys
	import torch.nn as nn
	import lightgbm as lgb
	from sklearn.ensemble import RandomForestRegressor
	import wget
	import warnings
	import gradio as gr
	import pandas as pd
	from matplotlib.backends.backend_agg import FigureCanvasAgg
	import PIL.Image as Image
	import matplotlib.pyplot as plt
	import pandas as pd
	import time
	warnings.filterwarnings("ignore")

	Eluent_smiles=['CCCCCC','CC(OCC)=O','C(Cl)Cl','CO','CCOCC']
	def parse_args():
	parser = argparse.ArgumentParser()
	parser.add_argument('--file_path', type=str, default=os.getcwd()+'\TLC_dataset.xlsx', help='path of download dataset')
	parser.add_argument('--dipole_path', type=str, default=os.getcwd() + '\compound_list_带化合物分类.xlsx',
	help='path of dipole file')
	parser.add_argument('--data_range', type=int, default=4944, help='utilized data range,robot:4114,manual:4458,new:4944')
	parser.add_argument('--automatic_divide', type=bool, default=False, help='automatically divide dataset by 80% train,10% validate and 10% test')
	parser.add_argument('--choose_total', type=int, default=387, help='train total num,robot:387,manual:530')
	parser.add_argument('--choose_train', type=int, default=308, help='train num,robot:387,manual:530')
	parser.add_argument('--choose_validate', type=int, default=38, help='validate num')
	parser.add_argument('--choose_test', type=int, default=38, help='test num')
	parser.add_argument('--seed', type=int, default=324, help='random seed for split dataset')
	parser.add_argument('--torch_seed', type=int, default=324, help='random seed for torch')
	parser.add_argument('--add_dipole', type=bool, default=True, help='add dipole into dataset')
	parser.add_argument('--add_molecular_descriptors', type=bool, default=True, help='add molecular_descriptors (分子量(MW)、拓扑极性表面积(TPSA)、可旋转键的个数(NROTB)、氢键供体个数(HBA)、氢键受体个数(HBD)、脂水分配系数值(LogP)) into dataset')
	parser.add_argument('--add_MACCkeys', type=bool, default=True,help='add MACCSkeys into dataset')
	parser.add_argument('--add_eluent_matrix', type=bool, default=True,help='add eluent matrix into dataset')
	parser.add_argument('--test_mode', type=str, default='robot', help='manual data or robot data or fix, costum test data')
	parser.add_argument('--use_model', type=str, default='Ensemble',help='the utilized model (XGB,LGB,ANN,RF,Ensemble,Bayesian)')
	parser.add_argument('--download_data', type=bool, default=False, help='use local dataset or download from dataset')
	parser.add_argument('--use_sigmoid', type=bool, default=True, help='use sigmoid')
	parser.add_argument('--shuffle_array', type=bool, default=True, help='shuffle_array')
	parser.add_argument('--characterization_mode', type=str, default='standard',
	help='the characterization mode for the dataset, including standard, precise_TPSA, no_multi')

	#---------------parapmeters for plot---------------------
	parser.add_argument('--plot_col_num', type=int, default=4, help='The col_num in plot')
	parser.add_argument('--plot_row_num', type=int, default=4, help='The row_num in plot')
	parser.add_argument('--plot_importance_num', type=int, default=10, help='The max importance num in plot')
	#--------------parameters For LGB-------------------
	parser.add_argument('--LGB_max_depth', type=int, default=5, help='max_depth for LGB')
	parser.add_argument('--LGB_num_leaves', type=int, default=25, help='num_leaves for LGB')
	parser.add_argument('--LGB_learning_rate', type=float, default=0.007, help='learning_rate for LGB')
	parser.add_argument('--LGB_n_estimators', type=int, default=1000, help='n_estimators for LGB')
	parser.add_argument('--LGB_early_stopping_rounds', type=int, default=200, help='early_stopping_rounds for LGB')

	#---------------parameters for XGB-----------------------
	parser.add_argument('--XGB_n_estimators', type=int, default=200, help='n_estimators for XGB')
	parser.add_argument('--XGB_max_depth', type=int, default=3, help='max_depth for XGB')
	parser.add_argument('--XGB_learning_rate', type=float, default=0.1, help='learning_rate for XGB')

	#---------------parameters for RF------------------------
	parser.add_argument('--RF_n_estimators', type=int, default=1000, help='n_estimators for RF')
	parser.add_argument('--RF_random_state', type=int, default=1, help='random_state for RF')
	parser.add_argument('--RF_n_jobs', type=int, default=1, help='n_jobs for RF')

	#--------------parameters for ANN-----------------------
	parser.add_argument('--NN_hidden_neuron', type=int, default=128, help='hidden neurons for NN')
	parser.add_argument('--NN_optimizer', type=str, default='Adam', help='optimizer for NN (Adam,SGD,RMSprop)')
	parser.add_argument('--NN_lr', type=float, default=0.005, help='learning rate for NN')
	parser.add_argument('--NN_model_save_location', type=str, default=os.getcwd()+'\model_save_NN', help='learning rate for NN')
	parser.add_argument('--NN_max_epoch', type=int, default=5000, help='max training epoch for NN')
	parser.add_argument('--NN_add_sigmoid', type=bool, default=True, help='whether add sigmoid in NN')
	parser.add_argument('--NN_add_PINN', type=bool, default=False, help='whether add PINN in NN')
	parser.add_argument('--NN_epi', type=float, default=100.0, help='The coef of PINN Loss in NN')



	config = parser.parse_args()
	config.device = 'cpu'
	return config

	class ANN(nn.Module):
	'''
	Construct artificial neural network
	'''
	def __init__(self, in_neuron, hidden_neuron, out_neuron,config):
	super(ANN, self).__init__()
	self.input_layer = nn.Linear(in_neuron, hidden_neuron)
	self.hidden_layer = nn.Linear(hidden_neuron, hidden_neuron)
	self.output_layer = nn.Linear(hidden_neuron, out_neuron)
	self.NN_add_sigmoid=config.NN_add_sigmoid


	def forward(self, x):
	x = self.input_layer(x)
	x = F.leaky_relu(x)
	x = self.hidden_layer(x)
	x = F.leaky_relu(x)
	x = self.hidden_layer(x)
	x = F.leaky_relu(x)
	x = self.hidden_layer(x)
	x = F.leaky_relu(x)
	x = self.output_layer(x)
	if self.NN_add_sigmoid==True:
	x = F.sigmoid(x)
	return x

	class Model_ML():
	def __init__(self,config,X_test):
	super(Model_ML, self).__init__()
	self.X_test=X_test
	self.seed=config.seed
	self.torch_seed=config.seed
	self.config=config
	self.add_dipole = config.add_dipole
	self.add_molecular_descriptors = config.add_molecular_descriptors
	self.add_eluent_matrix=config.add_eluent_matrix
	self.use_sigmoid=config.use_sigmoid

	self.use_model=config.use_model
	self.LGB_max_depth=config.LGB_max_depth
	self.LGB_num_leaves=config.LGB_num_leaves
	self.LGB_learning_rate=config.LGB_learning_rate
	self.LGB_n_estimators=config.LGB_n_estimators
	self.LGB_early_stopping_rounds=config.LGB_early_stopping_rounds

	self.XGB_n_estimators=config.XGB_n_estimators
	self.XGB_max_depth = config.XGB_max_depth
	self.XGB_learning_rate = config.XGB_learning_rate

	self.RF_n_estimators=config.RF_n_estimators
	self.RF_random_state=config.RF_random_state
	self.RF_n_jobs=config.RF_n_jobs

	self.NN_hidden_neuron=config.NN_hidden_neuron
	self.NN_optimizer=config.NN_optimizer
	self.NN_lr= config.NN_lr
	self.NN_model_save_location=config.NN_model_save_location
	self.NN_max_epoch=config.NN_max_epoch
	self.NN_add_PINN=config.NN_add_PINN
	self.NN_epi=config.NN_epi
	self.device=config.device

	self.plot_row_num=config.plot_row_num
	self.plot_col_num=config.plot_col_num
	self.plot_importance_num=config.plot_importance_num



	def load_model(self):
	model_LGB = lgb.LGBMRegressor(objective='regression', max_depth=self.LGB_max_depth,
	num_leaves=self.LGB_num_leaves,
	learning_rate=self.LGB_learning_rate, n_estimators=self.LGB_n_estimators)
	model_XGB = XGBRegressor(seed=self.seed,
	n_estimators=self.XGB_n_estimators,
	max_depth=self.XGB_max_depth,
	eval_metric='rmse',
	learning_rate=self.XGB_learning_rate,
	min_child_weight=1,
	subsample=1,
	colsample_bytree=1,
	colsample_bylevel=1,
	gamma=0)

	model_RF = RandomForestRegressor(n_estimators=self.RF_n_estimators,
	criterion='mse',
	random_state=self.RF_random_state,
	n_jobs=self.RF_n_jobs)

	Net = ANN(self.X_test.shape[1], self.NN_hidden_neuron, 1, config=self.config).to(self.device)
	#model_LGB = joblib.load('model_LGB.pkl')
	#wget.download('https://huggingface.co/woshixuhao/Rf_prediction/resolve/main/model_LGB.pkl')
	#wget.download('https://huggingface.co/woshixuhao/Rf_prediction/resolve/main/model_XGB.pkl')
	#wget.download('https://huggingface.co/woshixuhao/Rf_prediction/resolve/main/model_RF.pkl')
	#wget.download('https://huggingface.co/woshixuhao/Rf_prediction/resolve/main/model_ANN.pkl')
	model_LGB = joblib.load('model_LGB.pkl')
	model_XGB = joblib.load('model_XGB.pkl')
	model_RF = joblib.load('model_RF.pkl')
	Net.load_state_dict(torch.load('model_ANN.pkl',map_location=torch.device('cpu')))
	return model_LGB,model_XGB,model_RF,Net

	def get_Rf(self):
	model_LGB, model_XGB, model_RF, model_ANN = Model_ML.load_model(self)

	X_test_ANN = Variable(torch.from_numpy(self.X_test.astype(np.float32)).to(self.device), requires_grad=True)
	y_pred_ANN = model_ANN(X_test_ANN).cpu().data.numpy()
	y_pred_ANN = y_pred_ANN.reshape(y_pred_ANN.shape[0], )


	y_pred_XGB = model_XGB.predict(self.X_test)
	if self.use_sigmoid == True:
	y_pred_XGB = 1 / (1 + np.exp(-y_pred_XGB))

	y_pred_LGB = model_LGB.predict(self.X_test)
	if self.use_sigmoid == True:
	y_pred_LGB = 1 / (1 + np.exp(-y_pred_LGB))

	y_pred_RF = model_RF.predict(self.X_test)
	if self.use_sigmoid == True:
	y_pred_RF = 1 / (1 + np.exp(-y_pred_RF))

	y_pred = (0.2 * y_pred_LGB + 0.2 * y_pred_XGB + 0.2 * y_pred_RF + 0.4 * y_pred_ANN)
	return y_pred

	def get_descriptor(smiles,ratio):
	compound_mol = Chem.MolFromSmiles(smiles)
	descriptor=[]
	descriptor.append(Descriptors.ExactMolWt(compound_mol))
	descriptor.append(Chem.rdMolDescriptors.CalcTPSA(compound_mol))
	descriptor.append(Descriptors.NumRotatableBonds(compound_mol)) # Number of rotable bonds
	descriptor.append(Descriptors.NumHDonors(compound_mol)) # Number of H bond donors
	descriptor.append(Descriptors.NumHAcceptors(compound_mol)) # Number of H bond acceptors
	descriptor.append(Descriptors.MolLogP(compound_mol)) # LogP
	descriptor=np.array(descriptor)*ratio
	return descriptor

	def get_eluent_descriptor(eluent):
	eluent=np.array(eluent)
	des = np.zeros([6,])
	for i in range(eluent.shape[0]):
	if eluent[i] != 0:
	e_descriptors = get_descriptor(Eluent_smiles[i], eluent[i])
	des+=e_descriptors
	return des

	def get_data_from_smile(smile, eluent_list):
	compound_mol = Chem.MolFromSmiles(smile)
	Finger = MACCSkeys.GenMACCSKeys(Chem.MolFromSmiles(smile))
	fingerprint = np.array([x for x in Finger])
	compound_finger = fingerprint
	compound_MolWt = Descriptors.ExactMolWt(compound_mol)
	compound_TPSA = Chem.rdMolDescriptors.CalcTPSA(compound_mol)
	compound_nRotB = Descriptors.NumRotatableBonds(compound_mol) # Number of rotable bonds
	compound_HBD = Descriptors.NumHDonors(compound_mol) # Number of H bond donors
	compound_HBA = Descriptors.NumHAcceptors(compound_mol) # Number of H bond acceptors
	compound_LogP = Descriptors.MolLogP(compound_mol) # LogP
	X_test = np.zeros([1, 179])
	X_test[0, 0:167] = compound_finger
	X_test[0, 167:173] = 0
	X_test[0, 173:179] = [compound_MolWt, compound_TPSA, compound_nRotB, compound_HBD, compound_HBA, compound_LogP]

	eluent_array = get_eluent_descriptor(eluent_list)
	eluent_array = np.array(eluent_array)
	X_test[0, 167:173] = eluent_array

	return X_test

	def get_data_from_xlsx(file_name):
	file_open = pd.read_csv(file_name)
	smiles = file_open['SMILES'].values
	PEs = file_open['PE'].values
	EAs = file_open['EA'].values
	DCMs = file_open['DCM'].values
	MeOHs = file_open['MeOH'].values
	Et2Os = file_open['Et2O'].values
	X_test = np.zeros([len(smiles), 179])
	for i in range(len(smiles)):
	smile=smiles[i]
	eluent_sum = PEs[i] + EAs[i] + DCMs[i] + MeOHs[i] + Et2Os[i]
	if eluent_sum != 0:
	eluent_list = [PEs[i] / eluent_sum, EAs[i] / eluent_sum, DCMs[i] / eluent_sum, MeOHs[i] / eluent_sum, Et2Os[i] / eluent_sum]
	else:
	eluent_list = [0, 0, 0, 0, 0]
	compound_mol = Chem.MolFromSmiles(smile)
	Finger = MACCSkeys.GenMACCSKeys(Chem.MolFromSmiles(smile))
	fingerprint = np.array([x for x in Finger])
	compound_finger = fingerprint
	compound_MolWt = Descriptors.ExactMolWt(compound_mol)
	compound_TPSA = Chem.rdMolDescriptors.CalcTPSA(compound_mol)
	compound_nRotB = Descriptors.NumRotatableBonds(compound_mol) # Number of rotable bonds
	compound_HBD = Descriptors.NumHDonors(compound_mol) # Number of H bond donors
	compound_HBA = Descriptors.NumHAcceptors(compound_mol) # Number of H bond acceptors
	compound_LogP = Descriptors.MolLogP(compound_mol) # LogP

	X_test[i, 0:167] = compound_finger
	X_test[i, 167:173] = 0
	X_test[i, 173:179] = [compound_MolWt, compound_TPSA, compound_nRotB, compound_HBD, compound_HBA, compound_LogP]

	eluent_array = get_eluent_descriptor(eluent_list)
	eluent_array = np.array(eluent_array)
	X_test[i, 167:173] = eluent_array
	return X_test

	def predict_single(smile,PE,EA,DCM,MeOH,Et20):
	if PE==None:
	PE=0
	if EA==None:
	EA=0
	if DCM==None:
	DCM=0
	if MeOH==None:
	MeOH=0
	if Et20==None:
	Et20=0
	config = parse_args()
	config.add_dipole = False
	eluent_sum=PE+EA+DCM+MeOH+Et20
	if eluent_sum!=0:
	eluent_list=[PE/eluent_sum,EA/eluent_sum,DCM/eluent_sum,MeOH/eluent_sum,Et20/eluent_sum]
	else:
	eluent_list=[0,0,0,0,0]
	X_test=get_data_from_smile(smile,eluent_list)
	Model = Model_ML(config,X_test)
	Rf=Model.get_Rf()
	return Rf[0]

	def predict_xlsx(file):
	file_name=file.name
	config = parse_args()
	config.add_dipole = False
	X_test = get_data_from_xlsx(file_name)
	Model = Model_ML(config, X_test)
	Rf = Model.get_Rf()
	file_open = pd.read_csv(file_name)
	file_open['Rf']=Rf
	file_open.to_csv(file_name)
	return file_name

	def get_data_from_smile_compare(smile):
	x_PE = np.array([[0, 1, 0, 0, 0], [0.333333, 0.666667, 0, 0, 0], [0.5, 0.5, 0, 0, 0],
	[0.75, 0.25, 0, 0, 0], [0.833333, 0.166667, 0, 0, 0], [0.952381, 0.047619, 0, 0, 0],
	[0.980392, 0.019608, 0, 0, 0], [1, 0, 0, 0, 0]], dtype=np.float32)
	x_PE=np.flip(x_PE,axis=0)
	x_ME = np.array([[0, 0, 1, 0, 0], [0, 0, 0.990099, 0.009901, 0], [0, 0, 0.980392, 0.019608, 0],
	[0, 0, 0.967742, 0.032258, 0], [0, 0, 0.952381, 0.047619, 0],
	[0, 0, 0.909091, 0.090909, 0]], dtype=np.float32)
	x_Et = np.array([[1,0,0,0,0],[0.66667, 0, 0, 0, 0.33333], [0.5, 0, 0, 0, 0.5],[0.33333,0,0,0,0.66667], [0, 0, 0, 0, 1]])


	compound_mol = Chem.MolFromSmiles(smile)
	Finger = MACCSkeys.GenMACCSKeys(Chem.MolFromSmiles(smile))
	fingerprint = np.array([x for x in Finger])
	compound_finger = fingerprint
	compound_MolWt = Descriptors.ExactMolWt(compound_mol)
	compound_TPSA = Chem.rdMolDescriptors.CalcTPSA(compound_mol)
	compound_nRotB = Descriptors.NumRotatableBonds(compound_mol) # Number of rotable bonds
	compound_HBD = Descriptors.NumHDonors(compound_mol) # Number of H bond donors
	compound_HBA = Descriptors.NumHAcceptors(compound_mol) # Number of H bond acceptors
	compound_LogP = Descriptors.MolLogP(compound_mol) # LogP
	X_test_PE=[]
	X_test_ME=[]
	X_test_Et=[]
	X_test = np.zeros([1, 179])
	X_test[0, 0:167] = compound_finger
	X_test[0, 167:173] = 0
	X_test[0, 173:179] = [compound_MolWt, compound_TPSA, compound_nRotB, compound_HBD, compound_HBA, compound_LogP]

	for x in x_PE:
	X_test[0, 167:173] =get_eluent_descriptor(x)
	X_test_PE.append(X_test.copy())
	for x in x_ME:
	X_test[0, 167:173] = get_eluent_descriptor(x)
	X_test_ME.append(X_test.copy())
	for x in x_Et:
	X_test[0, 167:173] = get_eluent_descriptor(x)
	X_test_Et.append(X_test.copy())

	X_test_PE=np.squeeze(np.array(X_test_PE))
	X_test_Et=np.squeeze(np.array(X_test_Et))
	X_test_ME=np.squeeze(np.array(X_test_ME))
	return X_test_PE,X_test_Et,X_test_ME

	def convert_fig_PIL(fig):
	canvas = FigureCanvasAgg(fig)
	canvas.draw()
	w, h = canvas.get_width_height()
	buf = np.fromstring(canvas.tostring_argb(), dtype=np.uint8)
	buf.shape = (w, h, 4)
	buf = np.roll(buf, 3, axis=2)
	image = Image.frombytes("RGBA", (w, h), buf.tostring())
	return image

	def predict_compare(smile_1,smile_2):
	config = parse_args()
	config.add_dipole = False
	X_test_PE_1,X_test_Et_1,X_test_ME_1=get_data_from_smile_compare(smile_1)
	X_test_PE_2,X_test_Et_2,X_test_ME_2=get_data_from_smile_compare(smile_2)
	Rf_all=[]
	for x_test in [X_test_PE_1,X_test_Et_1,X_test_ME_1,X_test_PE_2,X_test_Et_2,X_test_ME_2]:
	Model = Model_ML(config,x_test)
	Rf=Model.get_Rf()
	Rf_all.append(Rf)
	fig1=plot_Rf(Rf_all[0],Rf_all[3],'PE:EA')
	fig2 = plot_Rf(Rf_all[2], Rf_all[5], 'DCM:MeOH')
	fig3 = plot_Rf(Rf_all[1], Rf_all[4], 'PE:Et2O')
	fig1=convert_fig_PIL(fig1)
	fig2=convert_fig_PIL(fig2)
	fig3=convert_fig_PIL(fig3)
	return fig1,fig2,fig3



	def plot_Rf(Rf_1,Rf_2,eluent):
	EA = np.array([0, 0.019608, 0.047619, 0.166667, 0.25, 0.5, 0.666667, 1])
	ME = np.array([0, 0.009901, 0.019608, 0.032258, 0.047619, 0.090909])
	Et = np.array([0, 0.33333, 0.5, 0.66667, 1])
	font1 = {'family': 'Arial',
	'weight': 'normal',
	'size': 5}


	if eluent=='PE:EA':
	fig = plt.figure(1, figsize=(2, 2), dpi=300)
	plt.clf()
	ax = plt.subplot(1, 1, 1)
	plt.plot(np.arange(0,EA.shape[0],1), Rf_1, c='#82B0D2', label='SMILE_1', zorder=1)
	plt.plot(np.arange(0,EA.shape[0],1), Rf_2, c='#8A83B4', label='SMILE_2', zorder=1)
	plt.scatter(np.arange(0,EA.shape[0],1), Rf_1, color='white', edgecolors='black', marker='^', s=10, zorder=1,linewidths=0.5)
	plt.scatter(np.arange(0,EA.shape[0],1), Rf_2, color='white', edgecolors='black', marker='*', s=10, zorder=2,linewidths=0.5)
	plt.xlabel('PE:EA',font1)
	plt.ylabel('Rf',font1)
	plt.xticks(np.arange(0,EA.shape[0],1), ['1:0','50:1','20:1','5:1','3:1','1:1','1:2','0:1'],fontproperties='Arial', size=4)
	plt.yticks([0,0.2,0.4,0.6,0.8,1.0],[0,0.2,0.4,0.6,0.8,1.0],fontproperties='Arial', size=4)
	plt.legend(loc='lower right', prop=font1)
	if eluent == 'DCM:MeOH':
	fig = plt.figure(2, figsize=(2, 2), dpi=300)
	plt.clf()
	ax = plt.subplot(1, 1, 1)
	plt.plot(np.arange(0,ME.shape[0],1), Rf_1, c='#82B0D2', label='SMILE_1', zorder=1)
	plt.plot(np.arange(0,ME.shape[0],1), Rf_2, c='#8A83B4', label='SMILE_2', zorder=1)
	plt.scatter(np.arange(0,ME.shape[0],1), Rf_1, color='white', edgecolors='black', marker='^', s=10, zorder=1,linewidths=0.5)
	plt.scatter(np.arange(0,ME.shape[0],1), Rf_2, color='white', edgecolors='black', marker='*', s=10, zorder=2,linewidths=0.5)
	plt.xlabel('DCM:MeOH', font1)
	plt.ylabel('Rf', font1)
	plt.xticks(np.arange(0,ME.shape[0],1), ['1:0','100:1','50:1','30:1','20:1','10:1'], fontproperties='Arial', size=4)
	plt.yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0], [0, 0.2, 0.4, 0.6, 0.8, 1.0], fontproperties='Arial', size=4)
	plt.legend(loc='lower right', prop=font1)
	if eluent == 'PE:Et2O':
	fig = plt.figure(3, figsize=(2, 2), dpi=300)
	plt.clf()
	ax = plt.subplot(1, 1, 1)
	plt.plot(np.arange(0,Et.shape[0],1), Rf_1, c='#82B0D2', label='SMILE_1', zorder=1)
	plt.plot(np.arange(0,Et.shape[0],1), Rf_2, c='#8A83B4', label='SMILE_2', zorder=1)
	plt.scatter(np.arange(0,Et.shape[0],1), Rf_1, color='white', edgecolors='black', marker='^', s=10, zorder=1,linewidths=0.5)
	plt.scatter(np.arange(0,Et.shape[0],1), Rf_2, color='white', edgecolors='black', marker='*', s=10, zorder=2,linewidths=0.5)
	plt.xlabel('PE:Et2O', font1)
	plt.ylabel('Rf', font1)
	plt.xticks(np.arange(0,Et.shape[0],1), ['1:0','2:1','1:1','1:2','0:1'], fontproperties='Arial', size=4)
	plt.yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0], [0, 0.2, 0.4, 0.6, 0.8, 1.0], fontproperties='Arial', size=4)
	plt.legend(loc='lower right', prop=font1)
	plt.title(eluent,font1)
	plt.tight_layout()
	plt.ylim(-0.1, 1.1)
	return fig

	if __name__=='__main__':


	theme = gr.themes.Monochrome(
	primary_hue="indigo",
	secondary_hue="blue",
	neutral_hue="slate",
	)

	model_card = f"""
	## Description\n
	It is a app for predicting Rf values of a compound under given eluents in TLC.\n
	input: smiles of one compound, such as CC(OCC)=O, and the ratio of five solvents, example: 20 1 0 0 0 for PE:EA=20:1\n
	output: the predicted Rf value.\n\n
	## Citation\n
	We would appreciate it if you use our software and give us credit in the acknowledgements section of your paper:\n
	we use RF prediction software in our synthesis work. [Citation 1, Citation 2]\n
	Citation1: H. Xu, J. Lin, Q. Liu, Y. Chen, J. Zhang, Y. Yang, M.C. Young, Y. Xu, D. Zhang, F. Mo
	High-throughput discovery of chemical structure-polarity relationships combining automation and machine-learning techniques
	Chem (2022), 3202–3214, 10.1016/j.chempr.2022.08.008\n
	Citation2: https://huggingface.co/spaces/woshixuhao/Rf_prediction\n
	Business applications require authorization!
	## Function\n
	Single predict: predict a compound under a given eluent system\n
	Batch predict: Upload a .csv file with multiple conditions to conduct batch prediction\n
	Rf compare: predict Rf values of two compounds under different eluents in TLC
	"""


	with gr.Blocks() as demo:
	gr.Markdown('''
	<div>
	<h1 style='text-align: center'>Rf prediction</h1>
	</div>
	''')
	gr.Markdown(model_card)
	with gr.Tab("Single prediction"):
	gr.Interface(fn=predict_single, inputs=["text", "number","number","number","number","number"], outputs='number')
	with gr.Tab("Batch prediction"):
	gr.Interface(fn=predict_xlsx,description='please upload a .csv file formatted in the form of the example', inputs="file", outputs="file",examples=[os.path.join(os.path.dirname(__file__),"TLC_1.csv")],cache_examples=True)
	with gr.Tab("Rf compare"):
	gr.Interface(fn=predict_compare, inputs=["text", "text"], outputs=["image","image","image"],
	description='input: smiles of two compounds, such as CC(OCC)=O and CCOCC\n output: three images that show the Rf curve with different eluent ratios under PE/EA, DCM/MeOH, PE/Et2O system.\n\n')
	demo.launch()
	# smile='O=C(OC1C(OC(C)=O)C(OC(C)=O)C(OC(C)=O)C(COC(C)=O)O1)C'
	# eluent=[0,0.9,0,0,0]
	# print(predict_single(smile,1,0,0,0,0))