Spaces:
Sleeping
Sleeping
Commit
·
19beb37
1
Parent(s):
a56f24f
Update app.py
Browse files
app.py
CHANGED
@@ -20,6 +20,8 @@ import wget
|
|
20 |
import warnings
|
21 |
import gradio as gr
|
22 |
import pandas as pd
|
|
|
|
|
23 |
warnings.filterwarnings("ignore")
|
24 |
|
25 |
Eluent_smiles=['CCCCCC','CC(OCC)=O','C(Cl)Cl','CO','CCOCC']
|
@@ -312,6 +314,133 @@ def predict_xlsx(file):
|
|
312 |
file_open.to_csv(file_name)
|
313 |
return file_name
|
314 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
315 |
if __name__=='__main__':
|
316 |
|
317 |
|
@@ -327,15 +456,16 @@ if __name__=='__main__':
|
|
327 |
input: smiles of one compound, such as CC(OCC)=O, and the ratio of five solvents, example: 20 1 0 0 0 for PE:EA=20:1\n
|
328 |
output: the predicted Rf value.\n\n
|
329 |
## Citation\n
|
330 |
-
We would
|
331 |
-
|
332 |
Citation1: H. Xu, J. Lin, Q. Liu, Y. Chen, J. Zhang, Y. Yang, M.C. Young, Y. Xu, D. Zhang, F. Mo
|
333 |
High-throughput discovery of chemical structure-polarity relationships combining automation and machine-learning techniques
|
334 |
Chem (2022), 3202–3214, 10.1016/j.chempr.2022.08.008\n
|
335 |
Citation2: https://huggingface.co/spaces/woshixuhao/Rf_compare
|
336 |
## Function\n
|
337 |
-
Single predict: predict a compound under a given eluent system
|
338 |
-
|
|
|
339 |
"""
|
340 |
|
341 |
|
@@ -348,9 +478,11 @@ if __name__=='__main__':
|
|
348 |
gr.Markdown(model_card)
|
349 |
with gr.Tab("Single prediction"):
|
350 |
gr.Interface(fn=predict_single, inputs=["text", "number","number","number","number","number"], outputs='number')
|
351 |
-
with gr.Tab("
|
352 |
gr.Interface(fn=predict_xlsx,description='please upload a .csv file formatted in the form of the example', inputs="file", outputs="file",examples=[os.path.join(os.path.dirname(__file__),"TLC_1.csv")],cache_examples=True)
|
353 |
-
|
|
|
|
|
354 |
demo.launch()
|
355 |
# smile='O=C(OC1C(OC(C)=O)C(OC(C)=O)C(OC(C)=O)C(COC(C)=O)O1)C'
|
356 |
# eluent=[0,0.9,0,0,0]
|
|
|
20 |
import warnings
|
21 |
import gradio as gr
|
22 |
import pandas as pd
|
23 |
+
from matplotlib.backends.backend_agg import FigureCanvasAgg
|
24 |
+
import PIL.Image as Image
|
25 |
warnings.filterwarnings("ignore")
|
26 |
|
27 |
Eluent_smiles=['CCCCCC','CC(OCC)=O','C(Cl)Cl','CO','CCOCC']
|
|
|
314 |
file_open.to_csv(file_name)
|
315 |
return file_name
|
316 |
|
317 |
+
def get_data_from_smile(smile):
|
318 |
+
x_PE = np.array([[0, 1, 0, 0, 0], [0.333333, 0.666667, 0, 0, 0], [0.5, 0.5, 0, 0, 0],
|
319 |
+
[0.75, 0.25, 0, 0, 0], [0.833333, 0.166667, 0, 0, 0], [0.952381, 0.047619, 0, 0, 0],
|
320 |
+
[0.980392, 0.019608, 0, 0, 0], [1, 0, 0, 0, 0]], dtype=np.float32)
|
321 |
+
x_PE=np.flip(x_PE,axis=0)
|
322 |
+
x_ME = np.array([[0, 0, 1, 0, 0], [0, 0, 0.990099, 0.009901, 0], [0, 0, 0.980392, 0.019608, 0],
|
323 |
+
[0, 0, 0.967742, 0.032258, 0], [0, 0, 0.952381, 0.047619, 0],
|
324 |
+
[0, 0, 0.909091, 0.090909, 0]], dtype=np.float32)
|
325 |
+
x_Et = np.array([[1,0,0,0,0],[0.66667, 0, 0, 0, 0.33333], [0.5, 0, 0, 0, 0.5],[0.33333,0,0,0,0.66667], [0, 0, 0, 0, 1]])
|
326 |
+
|
327 |
+
|
328 |
+
compound_mol = Chem.MolFromSmiles(smile)
|
329 |
+
Finger = MACCSkeys.GenMACCSKeys(Chem.MolFromSmiles(smile))
|
330 |
+
fingerprint = np.array([x for x in Finger])
|
331 |
+
compound_finger = fingerprint
|
332 |
+
compound_MolWt = Descriptors.ExactMolWt(compound_mol)
|
333 |
+
compound_TPSA = Chem.rdMolDescriptors.CalcTPSA(compound_mol)
|
334 |
+
compound_nRotB = Descriptors.NumRotatableBonds(compound_mol) # Number of rotable bonds
|
335 |
+
compound_HBD = Descriptors.NumHDonors(compound_mol) # Number of H bond donors
|
336 |
+
compound_HBA = Descriptors.NumHAcceptors(compound_mol) # Number of H bond acceptors
|
337 |
+
compound_LogP = Descriptors.MolLogP(compound_mol) # LogP
|
338 |
+
X_test_PE=[]
|
339 |
+
X_test_ME=[]
|
340 |
+
X_test_Et=[]
|
341 |
+
X_test = np.zeros([1, 179])
|
342 |
+
X_test[0, 0:167] = compound_finger
|
343 |
+
X_test[0, 167:173] = 0
|
344 |
+
X_test[0, 173:179] = [compound_MolWt, compound_TPSA, compound_nRotB, compound_HBD, compound_HBA, compound_LogP]
|
345 |
+
|
346 |
+
for x in x_PE:
|
347 |
+
X_test[0, 167:173] =get_eluent_descriptor(x)
|
348 |
+
X_test_PE.append(X_test.copy())
|
349 |
+
for x in x_ME:
|
350 |
+
X_test[0, 167:173] = get_eluent_descriptor(x)
|
351 |
+
X_test_ME.append(X_test.copy())
|
352 |
+
for x in x_Et:
|
353 |
+
X_test[0, 167:173] = get_eluent_descriptor(x)
|
354 |
+
X_test_Et.append(X_test.copy())
|
355 |
+
|
356 |
+
X_test_PE=np.squeeze(np.array(X_test_PE))
|
357 |
+
X_test_Et=np.squeeze(np.array(X_test_Et))
|
358 |
+
X_test_ME=np.squeeze(np.array(X_test_ME))
|
359 |
+
return X_test_PE,X_test_Et,X_test_ME
|
360 |
+
|
361 |
+
def convert_fig_PIL(fig):
|
362 |
+
canvas = FigureCanvasAgg(fig)
|
363 |
+
canvas.draw()
|
364 |
+
w, h = canvas.get_width_height()
|
365 |
+
buf = np.fromstring(canvas.tostring_argb(), dtype=np.uint8)
|
366 |
+
buf.shape = (w, h, 4)
|
367 |
+
buf = np.roll(buf, 3, axis=2)
|
368 |
+
image = Image.frombytes("RGBA", (w, h), buf.tostring())
|
369 |
+
return image
|
370 |
+
|
371 |
+
def predict_compare(smile_1,smile_2):
|
372 |
+
config = parse_args()
|
373 |
+
config.add_dipole = False
|
374 |
+
X_test_PE_1,X_test_Et_1,X_test_ME_1=get_data_from_smile(smile_1)
|
375 |
+
X_test_PE_2,X_test_Et_2,X_test_ME_2=get_data_from_smile(smile_2)
|
376 |
+
Rf_all=[]
|
377 |
+
for x_test in [X_test_PE_1,X_test_Et_1,X_test_ME_1,X_test_PE_2,X_test_Et_2,X_test_ME_2]:
|
378 |
+
Model = Model_ML(config,x_test)
|
379 |
+
Rf=Model.get_Rf()
|
380 |
+
Rf_all.append(Rf)
|
381 |
+
fig1=plot_Rf(Rf_all[0],Rf_all[3],'PE:EA')
|
382 |
+
fig2 = plot_Rf(Rf_all[2], Rf_all[5], 'DCM:MeOH')
|
383 |
+
fig3 = plot_Rf(Rf_all[1], Rf_all[4], 'PE:Et2O')
|
384 |
+
fig1=convert_fig_PIL(fig1)
|
385 |
+
fig2=convert_fig_PIL(fig2)
|
386 |
+
fig3=convert_fig_PIL(fig3)
|
387 |
+
return fig1,fig2,fig3
|
388 |
+
|
389 |
+
|
390 |
+
|
391 |
+
def plot_Rf(Rf_1,Rf_2,eluent):
|
392 |
+
EA = np.array([0, 0.019608, 0.047619, 0.166667, 0.25, 0.5, 0.666667, 1])
|
393 |
+
ME = np.array([0, 0.009901, 0.019608, 0.032258, 0.047619, 0.090909])
|
394 |
+
Et = np.array([0, 0.33333, 0.5, 0.66667, 1])
|
395 |
+
font1 = {'family': 'Arial',
|
396 |
+
'weight': 'normal',
|
397 |
+
'size': 5}
|
398 |
+
|
399 |
+
|
400 |
+
if eluent=='PE:EA':
|
401 |
+
fig = plt.figure(1, figsize=(2, 2), dpi=300)
|
402 |
+
plt.clf()
|
403 |
+
ax = plt.subplot(1, 1, 1)
|
404 |
+
plt.plot(np.arange(0,EA.shape[0],1), Rf_1, c='#82B0D2', label='SMILE_1', zorder=1)
|
405 |
+
plt.plot(np.arange(0,EA.shape[0],1), Rf_2, c='#8A83B4', label='SMILE_2', zorder=1)
|
406 |
+
plt.scatter(np.arange(0,EA.shape[0],1), Rf_1, color='white', edgecolors='black', marker='^', s=10, zorder=1,linewidths=0.5)
|
407 |
+
plt.scatter(np.arange(0,EA.shape[0],1), Rf_2, color='white', edgecolors='black', marker='*', s=10, zorder=2,linewidths=0.5)
|
408 |
+
plt.xlabel('PE:EA',font1)
|
409 |
+
plt.ylabel('Rf',font1)
|
410 |
+
plt.xticks(np.arange(0,EA.shape[0],1), ['1:0','50:1','20:1','5:1','3:1','1:1','1:2','0:1'],fontproperties='Arial', size=4)
|
411 |
+
plt.yticks([0,0.2,0.4,0.6,0.8,1.0],[0,0.2,0.4,0.6,0.8,1.0],fontproperties='Arial', size=4)
|
412 |
+
plt.legend(loc='lower right', prop=font1)
|
413 |
+
if eluent == 'DCM:MeOH':
|
414 |
+
fig = plt.figure(2, figsize=(2, 2), dpi=300)
|
415 |
+
plt.clf()
|
416 |
+
ax = plt.subplot(1, 1, 1)
|
417 |
+
plt.plot(np.arange(0,ME.shape[0],1), Rf_1, c='#82B0D2', label='SMILE_1', zorder=1)
|
418 |
+
plt.plot(np.arange(0,ME.shape[0],1), Rf_2, c='#8A83B4', label='SMILE_2', zorder=1)
|
419 |
+
plt.scatter(np.arange(0,ME.shape[0],1), Rf_1, color='white', edgecolors='black', marker='^', s=10, zorder=1,linewidths=0.5)
|
420 |
+
plt.scatter(np.arange(0,ME.shape[0],1), Rf_2, color='white', edgecolors='black', marker='*', s=10, zorder=2,linewidths=0.5)
|
421 |
+
plt.xlabel('DCM:MeOH', font1)
|
422 |
+
plt.ylabel('Rf', font1)
|
423 |
+
plt.xticks(np.arange(0,ME.shape[0],1), ['1:0','100:1','50:1','30:1','20:1','10:1'], fontproperties='Arial', size=4)
|
424 |
+
plt.yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0], [0, 0.2, 0.4, 0.6, 0.8, 1.0], fontproperties='Arial', size=4)
|
425 |
+
plt.legend(loc='lower right', prop=font1)
|
426 |
+
if eluent == 'PE:Et2O':
|
427 |
+
fig = plt.figure(3, figsize=(2, 2), dpi=300)
|
428 |
+
plt.clf()
|
429 |
+
ax = plt.subplot(1, 1, 1)
|
430 |
+
plt.plot(np.arange(0,Et.shape[0],1), Rf_1, c='#82B0D2', label='SMILE_1', zorder=1)
|
431 |
+
plt.plot(np.arange(0,Et.shape[0],1), Rf_2, c='#8A83B4', label='SMILE_2', zorder=1)
|
432 |
+
plt.scatter(np.arange(0,Et.shape[0],1), Rf_1, color='white', edgecolors='black', marker='^', s=10, zorder=1,linewidths=0.5)
|
433 |
+
plt.scatter(np.arange(0,Et.shape[0],1), Rf_2, color='white', edgecolors='black', marker='*', s=10, zorder=2,linewidths=0.5)
|
434 |
+
plt.xlabel('PE:Et2O', font1)
|
435 |
+
plt.ylabel('Rf', font1)
|
436 |
+
plt.xticks(np.arange(0,Et.shape[0],1), ['1:0','2:1','1:1','1:2','0:1'], fontproperties='Arial', size=4)
|
437 |
+
plt.yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0], [0, 0.2, 0.4, 0.6, 0.8, 1.0], fontproperties='Arial', size=4)
|
438 |
+
plt.legend(loc='lower right', prop=font1)
|
439 |
+
plt.title(eluent,font1)
|
440 |
+
plt.tight_layout()
|
441 |
+
plt.ylim(-0.1, 1.1)
|
442 |
+
return fig
|
443 |
+
|
444 |
if __name__=='__main__':
|
445 |
|
446 |
|
|
|
456 |
input: smiles of one compound, such as CC(OCC)=O, and the ratio of five solvents, example: 20 1 0 0 0 for PE:EA=20:1\n
|
457 |
output: the predicted Rf value.\n\n
|
458 |
## Citation\n
|
459 |
+
We would appreciate it if you use our software and give us credit in the acknowledgements section of your paper:\n
|
460 |
+
we use RF prediction software in our synthesis work. [Citation 1, Citation 2]
|
461 |
Citation1: H. Xu, J. Lin, Q. Liu, Y. Chen, J. Zhang, Y. Yang, M.C. Young, Y. Xu, D. Zhang, F. Mo
|
462 |
High-throughput discovery of chemical structure-polarity relationships combining automation and machine-learning techniques
|
463 |
Chem (2022), 3202–3214, 10.1016/j.chempr.2022.08.008\n
|
464 |
Citation2: https://huggingface.co/spaces/woshixuhao/Rf_compare
|
465 |
## Function\n
|
466 |
+
Single predict: predict a compound under a given eluent system\n
|
467 |
+
Batch predict: Upload a .csv file with multiple conditions to conduct batch prediction\n
|
468 |
+
Rf compare: predict Rf values of two compounds under different eluents in TLC
|
469 |
"""
|
470 |
|
471 |
|
|
|
478 |
gr.Markdown(model_card)
|
479 |
with gr.Tab("Single prediction"):
|
480 |
gr.Interface(fn=predict_single, inputs=["text", "number","number","number","number","number"], outputs='number')
|
481 |
+
with gr.Tab("Batch prediction"):
|
482 |
gr.Interface(fn=predict_xlsx,description='please upload a .csv file formatted in the form of the example', inputs="file", outputs="file",examples=[os.path.join(os.path.dirname(__file__),"TLC_1.csv")],cache_examples=True)
|
483 |
+
with gr.Tab("Rf compare")
|
484 |
+
gr.Interface(fn=predict_compare, inputs=["text", "text"], outputs=["image","image","image"],
|
485 |
+
description='input: smiles of two compounds, such as CC(OCC)=O and CC(C)CCC(NC(C)C)=O\n output: three images that show the Rf curve with different eluent ratios under PE/EA, DCM/MeOH, PE/Et2O system.\n\n')
|
486 |
demo.launch()
|
487 |
# smile='O=C(OC1C(OC(C)=O)C(OC(C)=O)C(OC(C)=O)C(COC(C)=O)O1)C'
|
488 |
# eluent=[0,0.9,0,0,0]
|