woshixuhao commited on
Commit
19beb37
·
1 Parent(s): a56f24f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -6
app.py CHANGED
@@ -20,6 +20,8 @@ import wget
20
  import warnings
21
  import gradio as gr
22
  import pandas as pd
 
 
23
  warnings.filterwarnings("ignore")
24
 
25
  Eluent_smiles=['CCCCCC','CC(OCC)=O','C(Cl)Cl','CO','CCOCC']
@@ -312,6 +314,133 @@ def predict_xlsx(file):
312
  file_open.to_csv(file_name)
313
  return file_name
314
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
  if __name__=='__main__':
316
 
317
 
@@ -327,15 +456,16 @@ if __name__=='__main__':
327
  input: smiles of one compound, such as CC(OCC)=O, and the ratio of five solvents, example: 20 1 0 0 0 for PE:EA=20:1\n
328
  output: the predicted Rf value.\n\n
329
  ## Citation\n
330
- We would like appreciate if you use our software and give us credit in acknowledgement section of your paper
331
- We used Rf prediction software in our synthesis work.[Citation1, Citation2]\n
332
  Citation1: H. Xu, J. Lin, Q. Liu, Y. Chen, J. Zhang, Y. Yang, M.C. Young, Y. Xu, D. Zhang, F. Mo
333
  High-throughput discovery of chemical structure-polarity relationships combining automation and machine-learning techniques
334
  Chem (2022), 3202–3214, 10.1016/j.chempr.2022.08.008\n
335
  Citation2: https://huggingface.co/spaces/woshixuhao/Rf_compare
336
  ## Function\n
337
- Single predict: predict a compound under a given eluent system
338
- CSV predict: Upload a .csv file with multiple conditions to conduct batch prediction
 
339
  """
340
 
341
 
@@ -348,9 +478,11 @@ if __name__=='__main__':
348
  gr.Markdown(model_card)
349
  with gr.Tab("Single prediction"):
350
  gr.Interface(fn=predict_single, inputs=["text", "number","number","number","number","number"], outputs='number')
351
- with gr.Tab("CSV prediction"):
352
  gr.Interface(fn=predict_xlsx,description='please upload a .csv file formatted in the form of the example', inputs="file", outputs="file",examples=[os.path.join(os.path.dirname(__file__),"TLC_1.csv")],cache_examples=True)
353
-
 
 
354
  demo.launch()
355
  # smile='O=C(OC1C(OC(C)=O)C(OC(C)=O)C(OC(C)=O)C(COC(C)=O)O1)C'
356
  # eluent=[0,0.9,0,0,0]
 
20
  import warnings
21
  import gradio as gr
22
  import pandas as pd
23
+ from matplotlib.backends.backend_agg import FigureCanvasAgg
24
+ import PIL.Image as Image
25
  warnings.filterwarnings("ignore")
26
 
27
  Eluent_smiles=['CCCCCC','CC(OCC)=O','C(Cl)Cl','CO','CCOCC']
 
314
  file_open.to_csv(file_name)
315
  return file_name
316
 
317
+ def get_data_from_smile(smile):
318
+ x_PE = np.array([[0, 1, 0, 0, 0], [0.333333, 0.666667, 0, 0, 0], [0.5, 0.5, 0, 0, 0],
319
+ [0.75, 0.25, 0, 0, 0], [0.833333, 0.166667, 0, 0, 0], [0.952381, 0.047619, 0, 0, 0],
320
+ [0.980392, 0.019608, 0, 0, 0], [1, 0, 0, 0, 0]], dtype=np.float32)
321
+ x_PE=np.flip(x_PE,axis=0)
322
+ x_ME = np.array([[0, 0, 1, 0, 0], [0, 0, 0.990099, 0.009901, 0], [0, 0, 0.980392, 0.019608, 0],
323
+ [0, 0, 0.967742, 0.032258, 0], [0, 0, 0.952381, 0.047619, 0],
324
+ [0, 0, 0.909091, 0.090909, 0]], dtype=np.float32)
325
+ x_Et = np.array([[1,0,0,0,0],[0.66667, 0, 0, 0, 0.33333], [0.5, 0, 0, 0, 0.5],[0.33333,0,0,0,0.66667], [0, 0, 0, 0, 1]])
326
+
327
+
328
+ compound_mol = Chem.MolFromSmiles(smile)
329
+ Finger = MACCSkeys.GenMACCSKeys(Chem.MolFromSmiles(smile))
330
+ fingerprint = np.array([x for x in Finger])
331
+ compound_finger = fingerprint
332
+ compound_MolWt = Descriptors.ExactMolWt(compound_mol)
333
+ compound_TPSA = Chem.rdMolDescriptors.CalcTPSA(compound_mol)
334
+ compound_nRotB = Descriptors.NumRotatableBonds(compound_mol) # Number of rotable bonds
335
+ compound_HBD = Descriptors.NumHDonors(compound_mol) # Number of H bond donors
336
+ compound_HBA = Descriptors.NumHAcceptors(compound_mol) # Number of H bond acceptors
337
+ compound_LogP = Descriptors.MolLogP(compound_mol) # LogP
338
+ X_test_PE=[]
339
+ X_test_ME=[]
340
+ X_test_Et=[]
341
+ X_test = np.zeros([1, 179])
342
+ X_test[0, 0:167] = compound_finger
343
+ X_test[0, 167:173] = 0
344
+ X_test[0, 173:179] = [compound_MolWt, compound_TPSA, compound_nRotB, compound_HBD, compound_HBA, compound_LogP]
345
+
346
+ for x in x_PE:
347
+ X_test[0, 167:173] =get_eluent_descriptor(x)
348
+ X_test_PE.append(X_test.copy())
349
+ for x in x_ME:
350
+ X_test[0, 167:173] = get_eluent_descriptor(x)
351
+ X_test_ME.append(X_test.copy())
352
+ for x in x_Et:
353
+ X_test[0, 167:173] = get_eluent_descriptor(x)
354
+ X_test_Et.append(X_test.copy())
355
+
356
+ X_test_PE=np.squeeze(np.array(X_test_PE))
357
+ X_test_Et=np.squeeze(np.array(X_test_Et))
358
+ X_test_ME=np.squeeze(np.array(X_test_ME))
359
+ return X_test_PE,X_test_Et,X_test_ME
360
+
361
+ def convert_fig_PIL(fig):
362
+ canvas = FigureCanvasAgg(fig)
363
+ canvas.draw()
364
+ w, h = canvas.get_width_height()
365
+ buf = np.fromstring(canvas.tostring_argb(), dtype=np.uint8)
366
+ buf.shape = (w, h, 4)
367
+ buf = np.roll(buf, 3, axis=2)
368
+ image = Image.frombytes("RGBA", (w, h), buf.tostring())
369
+ return image
370
+
371
+ def predict_compare(smile_1,smile_2):
372
+ config = parse_args()
373
+ config.add_dipole = False
374
+ X_test_PE_1,X_test_Et_1,X_test_ME_1=get_data_from_smile(smile_1)
375
+ X_test_PE_2,X_test_Et_2,X_test_ME_2=get_data_from_smile(smile_2)
376
+ Rf_all=[]
377
+ for x_test in [X_test_PE_1,X_test_Et_1,X_test_ME_1,X_test_PE_2,X_test_Et_2,X_test_ME_2]:
378
+ Model = Model_ML(config,x_test)
379
+ Rf=Model.get_Rf()
380
+ Rf_all.append(Rf)
381
+ fig1=plot_Rf(Rf_all[0],Rf_all[3],'PE:EA')
382
+ fig2 = plot_Rf(Rf_all[2], Rf_all[5], 'DCM:MeOH')
383
+ fig3 = plot_Rf(Rf_all[1], Rf_all[4], 'PE:Et2O')
384
+ fig1=convert_fig_PIL(fig1)
385
+ fig2=convert_fig_PIL(fig2)
386
+ fig3=convert_fig_PIL(fig3)
387
+ return fig1,fig2,fig3
388
+
389
+
390
+
391
+ def plot_Rf(Rf_1,Rf_2,eluent):
392
+ EA = np.array([0, 0.019608, 0.047619, 0.166667, 0.25, 0.5, 0.666667, 1])
393
+ ME = np.array([0, 0.009901, 0.019608, 0.032258, 0.047619, 0.090909])
394
+ Et = np.array([0, 0.33333, 0.5, 0.66667, 1])
395
+ font1 = {'family': 'Arial',
396
+ 'weight': 'normal',
397
+ 'size': 5}
398
+
399
+
400
+ if eluent=='PE:EA':
401
+ fig = plt.figure(1, figsize=(2, 2), dpi=300)
402
+ plt.clf()
403
+ ax = plt.subplot(1, 1, 1)
404
+ plt.plot(np.arange(0,EA.shape[0],1), Rf_1, c='#82B0D2', label='SMILE_1', zorder=1)
405
+ plt.plot(np.arange(0,EA.shape[0],1), Rf_2, c='#8A83B4', label='SMILE_2', zorder=1)
406
+ plt.scatter(np.arange(0,EA.shape[0],1), Rf_1, color='white', edgecolors='black', marker='^', s=10, zorder=1,linewidths=0.5)
407
+ plt.scatter(np.arange(0,EA.shape[0],1), Rf_2, color='white', edgecolors='black', marker='*', s=10, zorder=2,linewidths=0.5)
408
+ plt.xlabel('PE:EA',font1)
409
+ plt.ylabel('Rf',font1)
410
+ plt.xticks(np.arange(0,EA.shape[0],1), ['1:0','50:1','20:1','5:1','3:1','1:1','1:2','0:1'],fontproperties='Arial', size=4)
411
+ plt.yticks([0,0.2,0.4,0.6,0.8,1.0],[0,0.2,0.4,0.6,0.8,1.0],fontproperties='Arial', size=4)
412
+ plt.legend(loc='lower right', prop=font1)
413
+ if eluent == 'DCM:MeOH':
414
+ fig = plt.figure(2, figsize=(2, 2), dpi=300)
415
+ plt.clf()
416
+ ax = plt.subplot(1, 1, 1)
417
+ plt.plot(np.arange(0,ME.shape[0],1), Rf_1, c='#82B0D2', label='SMILE_1', zorder=1)
418
+ plt.plot(np.arange(0,ME.shape[0],1), Rf_2, c='#8A83B4', label='SMILE_2', zorder=1)
419
+ plt.scatter(np.arange(0,ME.shape[0],1), Rf_1, color='white', edgecolors='black', marker='^', s=10, zorder=1,linewidths=0.5)
420
+ plt.scatter(np.arange(0,ME.shape[0],1), Rf_2, color='white', edgecolors='black', marker='*', s=10, zorder=2,linewidths=0.5)
421
+ plt.xlabel('DCM:MeOH', font1)
422
+ plt.ylabel('Rf', font1)
423
+ plt.xticks(np.arange(0,ME.shape[0],1), ['1:0','100:1','50:1','30:1','20:1','10:1'], fontproperties='Arial', size=4)
424
+ plt.yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0], [0, 0.2, 0.4, 0.6, 0.8, 1.0], fontproperties='Arial', size=4)
425
+ plt.legend(loc='lower right', prop=font1)
426
+ if eluent == 'PE:Et2O':
427
+ fig = plt.figure(3, figsize=(2, 2), dpi=300)
428
+ plt.clf()
429
+ ax = plt.subplot(1, 1, 1)
430
+ plt.plot(np.arange(0,Et.shape[0],1), Rf_1, c='#82B0D2', label='SMILE_1', zorder=1)
431
+ plt.plot(np.arange(0,Et.shape[0],1), Rf_2, c='#8A83B4', label='SMILE_2', zorder=1)
432
+ plt.scatter(np.arange(0,Et.shape[0],1), Rf_1, color='white', edgecolors='black', marker='^', s=10, zorder=1,linewidths=0.5)
433
+ plt.scatter(np.arange(0,Et.shape[0],1), Rf_2, color='white', edgecolors='black', marker='*', s=10, zorder=2,linewidths=0.5)
434
+ plt.xlabel('PE:Et2O', font1)
435
+ plt.ylabel('Rf', font1)
436
+ plt.xticks(np.arange(0,Et.shape[0],1), ['1:0','2:1','1:1','1:2','0:1'], fontproperties='Arial', size=4)
437
+ plt.yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0], [0, 0.2, 0.4, 0.6, 0.8, 1.0], fontproperties='Arial', size=4)
438
+ plt.legend(loc='lower right', prop=font1)
439
+ plt.title(eluent,font1)
440
+ plt.tight_layout()
441
+ plt.ylim(-0.1, 1.1)
442
+ return fig
443
+
444
  if __name__=='__main__':
445
 
446
 
 
456
  input: smiles of one compound, such as CC(OCC)=O, and the ratio of five solvents, example: 20 1 0 0 0 for PE:EA=20:1\n
457
  output: the predicted Rf value.\n\n
458
  ## Citation\n
459
+ We would appreciate it if you use our software and give us credit in the acknowledgements section of your paper:\n
460
+ we use RF prediction software in our synthesis work. [Citation 1, Citation 2]
461
  Citation1: H. Xu, J. Lin, Q. Liu, Y. Chen, J. Zhang, Y. Yang, M.C. Young, Y. Xu, D. Zhang, F. Mo
462
  High-throughput discovery of chemical structure-polarity relationships combining automation and machine-learning techniques
463
  Chem (2022), 3202–3214, 10.1016/j.chempr.2022.08.008\n
464
  Citation2: https://huggingface.co/spaces/woshixuhao/Rf_compare
465
  ## Function\n
466
+ Single predict: predict a compound under a given eluent system\n
467
+ Batch predict: Upload a .csv file with multiple conditions to conduct batch prediction\n
468
+ Rf compare: predict Rf values of two compounds under different eluents in TLC
469
  """
470
 
471
 
 
478
  gr.Markdown(model_card)
479
  with gr.Tab("Single prediction"):
480
  gr.Interface(fn=predict_single, inputs=["text", "number","number","number","number","number"], outputs='number')
481
+ with gr.Tab("Batch prediction"):
482
  gr.Interface(fn=predict_xlsx,description='please upload a .csv file formatted in the form of the example', inputs="file", outputs="file",examples=[os.path.join(os.path.dirname(__file__),"TLC_1.csv")],cache_examples=True)
483
+ with gr.Tab("Rf compare")
484
+ gr.Interface(fn=predict_compare, inputs=["text", "text"], outputs=["image","image","image"],
485
+ description='input: smiles of two compounds, such as CC(OCC)=O and CC(C)CCC(NC(C)C)=O\n output: three images that show the Rf curve with different eluent ratios under PE/EA, DCM/MeOH, PE/Et2O system.\n\n')
486
  demo.launch()
487
  # smile='O=C(OC1C(OC(C)=O)C(OC(C)=O)C(OC(C)=O)C(COC(C)=O)O1)C'
488
  # eluent=[0,0.9,0,0,0]