Spaces:

woshixuhao
/

Rf_prediction

Sleeping

App Files Files

woshixuhao commited on May 17, 2023

Commit

19beb37

1 Parent(s): a56f24f

Update app.py

Browse files

Files changed (1) hide show

app.py +138 -6

app.py CHANGED Viewed

@@ -20,6 +20,8 @@ import wget
 import warnings
 import gradio as gr
 import pandas as pd
 warnings.filterwarnings("ignore")
 Eluent_smiles=['CCCCCC','CC(OCC)=O','C(Cl)Cl','CO','CCOCC']
@@ -312,6 +314,133 @@ def predict_xlsx(file):
     file_open.to_csv(file_name)
     return file_name
 if __name__=='__main__':
@@ -327,15 +456,16 @@ if __name__=='__main__':
     input: smiles of one compound, such as CC(OCC)=O,   and the ratio of five solvents, example: 20 1 0 0 0 for PE:EA=20:1\n
     output: the predicted Rf value.\n\n
     ## Citation\n
-    We would like appreciate if you use our software and give us credit in acknowledgement section of your paper：
-    We used Rf prediction software in our synthesis work.[Citation1, Citation2]\n
     Citation1: H. Xu, J. Lin, Q. Liu, Y. Chen, J. Zhang, Y. Yang, M.C. Young, Y. Xu, D. Zhang, F. Mo
     High-throughput discovery of chemical structure-polarity relationships combining automation and machine-learning techniques
     Chem (2022), 3202–3214, 10.1016/j.chempr.2022.08.008\n
     Citation2: https://huggingface.co/spaces/woshixuhao/Rf_compare
     ## Function\n
-    Single predict: predict a compound under a given eluent system
-    CSV predict: Upload a .csv file with multiple conditions to conduct batch prediction
     """
@@ -348,9 +478,11 @@ if __name__=='__main__':
         gr.Markdown(model_card)
         with gr.Tab("Single prediction"):
             gr.Interface(fn=predict_single, inputs=["text", "number","number","number","number","number"], outputs='number')
-        with gr.Tab("CSV prediction"):
             gr.Interface(fn=predict_xlsx,description='please upload a .csv file formatted in the form of the example', inputs="file", outputs="file",examples=[os.path.join(os.path.dirname(__file__),"TLC_1.csv")],cache_examples=True)
     demo.launch()
     # smile='O=C(OC1C(OC(C)=O)C(OC(C)=O)C(OC(C)=O)C(COC(C)=O)O1)C'
     # eluent=[0,0.9,0,0,0]

 import warnings
 import gradio as gr
 import pandas as pd
+from matplotlib.backends.backend_agg import FigureCanvasAgg
+import PIL.Image as Image
 warnings.filterwarnings("ignore")
 Eluent_smiles=['CCCCCC','CC(OCC)=O','C(Cl)Cl','CO','CCOCC']
     file_open.to_csv(file_name)
     return file_name
+def get_data_from_smile(smile):
+    x_PE = np.array([[0, 1, 0, 0, 0], [0.333333, 0.666667, 0, 0, 0], [0.5, 0.5, 0, 0, 0],
+                         [0.75, 0.25, 0, 0, 0], [0.833333, 0.166667, 0, 0, 0], [0.952381, 0.047619, 0, 0, 0],
+                         [0.980392, 0.019608, 0, 0, 0], [1, 0, 0, 0, 0]], dtype=np.float32)
+    x_PE=np.flip(x_PE,axis=0)
+    x_ME = np.array([[0, 0, 1, 0, 0], [0, 0, 0.990099, 0.009901, 0], [0, 0, 0.980392, 0.019608, 0],
+                            [0, 0, 0.967742, 0.032258, 0], [0, 0, 0.952381, 0.047619, 0],
+                            [0, 0, 0.909091, 0.090909, 0]], dtype=np.float32)
+    x_Et = np.array([[1,0,0,0,0],[0.66667, 0, 0, 0, 0.33333], [0.5, 0, 0, 0, 0.5],[0.33333,0,0,0,0.66667], [0, 0, 0, 0, 1]])
+    compound_mol = Chem.MolFromSmiles(smile)
+    Finger = MACCSkeys.GenMACCSKeys(Chem.MolFromSmiles(smile))
+    fingerprint = np.array([x for x in Finger])
+    compound_finger = fingerprint
+    compound_MolWt = Descriptors.ExactMolWt(compound_mol)
+    compound_TPSA = Chem.rdMolDescriptors.CalcTPSA(compound_mol)
+    compound_nRotB = Descriptors.NumRotatableBonds(compound_mol)  # Number of rotable bonds
+    compound_HBD = Descriptors.NumHDonors(compound_mol)  # Number of H bond donors
+    compound_HBA = Descriptors.NumHAcceptors(compound_mol)  # Number of H bond acceptors
+    compound_LogP = Descriptors.MolLogP(compound_mol)  # LogP
+    X_test_PE=[]
+    X_test_ME=[]
+    X_test_Et=[]
+    X_test = np.zeros([1, 179])
+    X_test[0, 0:167] = compound_finger
+    X_test[0, 167:173] = 0
+    X_test[0, 173:179] = [compound_MolWt, compound_TPSA, compound_nRotB, compound_HBD, compound_HBA, compound_LogP]
+    for x in x_PE:
+        X_test[0, 167:173] =get_eluent_descriptor(x)
+        X_test_PE.append(X_test.copy())
+    for x in x_ME:
+        X_test[0, 167:173] = get_eluent_descriptor(x)
+        X_test_ME.append(X_test.copy())
+    for x in x_Et:
+        X_test[0, 167:173] = get_eluent_descriptor(x)
+        X_test_Et.append(X_test.copy())
+    X_test_PE=np.squeeze(np.array(X_test_PE))
+    X_test_Et=np.squeeze(np.array(X_test_Et))
+    X_test_ME=np.squeeze(np.array(X_test_ME))
+    return X_test_PE,X_test_Et,X_test_ME
+def convert_fig_PIL(fig):
+    canvas = FigureCanvasAgg(fig)
+    canvas.draw()
+    w, h = canvas.get_width_height()
+    buf = np.fromstring(canvas.tostring_argb(), dtype=np.uint8)
+    buf.shape = (w, h, 4)
+    buf = np.roll(buf, 3, axis=2)
+    image = Image.frombytes("RGBA", (w, h), buf.tostring())
+    return image
+def predict_compare(smile_1,smile_2):
+    config = parse_args()
+    config.add_dipole = False
+    X_test_PE_1,X_test_Et_1,X_test_ME_1=get_data_from_smile(smile_1)
+    X_test_PE_2,X_test_Et_2,X_test_ME_2=get_data_from_smile(smile_2)
+    Rf_all=[]
+    for x_test in [X_test_PE_1,X_test_Et_1,X_test_ME_1,X_test_PE_2,X_test_Et_2,X_test_ME_2]:
+        Model = Model_ML(config,x_test)
+        Rf=Model.get_Rf()
+        Rf_all.append(Rf)
+    fig1=plot_Rf(Rf_all[0],Rf_all[3],'PE:EA')
+    fig2 = plot_Rf(Rf_all[2], Rf_all[5], 'DCM:MeOH')
+    fig3 = plot_Rf(Rf_all[1], Rf_all[4], 'PE:Et2O')
+    fig1=convert_fig_PIL(fig1)
+    fig2=convert_fig_PIL(fig2)
+    fig3=convert_fig_PIL(fig3)
+    return fig1,fig2,fig3
+def plot_Rf(Rf_1,Rf_2,eluent):
+    EA = np.array([0,  0.019608,  0.047619, 0.166667, 0.25, 0.5, 0.666667, 1])
+    ME = np.array([0, 0.009901, 0.019608, 0.032258, 0.047619, 0.090909])
+    Et = np.array([0, 0.33333, 0.5, 0.66667, 1])
+    font1 = {'family': 'Arial',
+             'weight': 'normal',
+             'size': 5}
+    if eluent=='PE:EA':
+        fig = plt.figure(1, figsize=(2, 2), dpi=300)
+        plt.clf()
+        ax = plt.subplot(1, 1, 1)
+        plt.plot(np.arange(0,EA.shape[0],1), Rf_1, c='#82B0D2', label='SMILE_1', zorder=1)
+        plt.plot(np.arange(0,EA.shape[0],1), Rf_2, c='#8A83B4', label='SMILE_2', zorder=1)
+        plt.scatter(np.arange(0,EA.shape[0],1), Rf_1, color='white', edgecolors='black', marker='^', s=10, zorder=1,linewidths=0.5)
+        plt.scatter(np.arange(0,EA.shape[0],1), Rf_2, color='white', edgecolors='black', marker='*', s=10, zorder=2,linewidths=0.5)
+        plt.xlabel('PE:EA',font1)
+        plt.ylabel('Rf',font1)
+        plt.xticks(np.arange(0,EA.shape[0],1), ['1:0','50:1','20:1','5:1','3:1','1:1','1:2','0:1'],fontproperties='Arial', size=4)
+        plt.yticks([0,0.2,0.4,0.6,0.8,1.0],[0,0.2,0.4,0.6,0.8,1.0],fontproperties='Arial', size=4)
+        plt.legend(loc='lower right', prop=font1)
+    if eluent == 'DCM:MeOH':
+        fig = plt.figure(2, figsize=(2, 2), dpi=300)
+        plt.clf()
+        ax = plt.subplot(1, 1, 1)
+        plt.plot(np.arange(0,ME.shape[0],1), Rf_1, c='#82B0D2', label='SMILE_1', zorder=1)
+        plt.plot(np.arange(0,ME.shape[0],1), Rf_2, c='#8A83B4', label='SMILE_2', zorder=1)
+        plt.scatter(np.arange(0,ME.shape[0],1), Rf_1, color='white', edgecolors='black', marker='^', s=10, zorder=1,linewidths=0.5)
+        plt.scatter(np.arange(0,ME.shape[0],1), Rf_2, color='white', edgecolors='black', marker='*', s=10, zorder=2,linewidths=0.5)
+        plt.xlabel('DCM:MeOH', font1)
+        plt.ylabel('Rf', font1)
+        plt.xticks(np.arange(0,ME.shape[0],1), ['1:0','100:1','50:1','30:1','20:1','10:1'], fontproperties='Arial', size=4)
+        plt.yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0], [0, 0.2, 0.4, 0.6, 0.8, 1.0], fontproperties='Arial', size=4)
+        plt.legend(loc='lower right', prop=font1)
+    if eluent == 'PE:Et2O':
+        fig = plt.figure(3, figsize=(2, 2), dpi=300)
+        plt.clf()
+        ax = plt.subplot(1, 1, 1)
+        plt.plot(np.arange(0,Et.shape[0],1), Rf_1, c='#82B0D2', label='SMILE_1', zorder=1)
+        plt.plot(np.arange(0,Et.shape[0],1), Rf_2, c='#8A83B4', label='SMILE_2', zorder=1)
+        plt.scatter(np.arange(0,Et.shape[0],1), Rf_1, color='white', edgecolors='black', marker='^', s=10, zorder=1,linewidths=0.5)
+        plt.scatter(np.arange(0,Et.shape[0],1), Rf_2, color='white', edgecolors='black', marker='*', s=10, zorder=2,linewidths=0.5)
+        plt.xlabel('PE:Et2O', font1)
+        plt.ylabel('Rf', font1)
+        plt.xticks(np.arange(0,Et.shape[0],1), ['1:0','2:1','1:1','1:2','0:1'], fontproperties='Arial', size=4)
+        plt.yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0], [0, 0.2, 0.4, 0.6, 0.8, 1.0], fontproperties='Arial', size=4)
+        plt.legend(loc='lower right', prop=font1)
+    plt.title(eluent,font1)
+    plt.tight_layout()
+    plt.ylim(-0.1, 1.1)
+    return fig
 if __name__=='__main__':
     input: smiles of one compound, such as CC(OCC)=O,   and the ratio of five solvents, example: 20 1 0 0 0 for PE:EA=20:1\n
     output: the predicted Rf value.\n\n
     ## Citation\n
+    We would appreciate it if you use our software and give us credit in the acknowledgements section of your paper:\n
+    we use RF prediction software in our synthesis work. [Citation 1, Citation 2]
     Citation1: H. Xu, J. Lin, Q. Liu, Y. Chen, J. Zhang, Y. Yang, M.C. Young, Y. Xu, D. Zhang, F. Mo
     High-throughput discovery of chemical structure-polarity relationships combining automation and machine-learning techniques
     Chem (2022), 3202–3214, 10.1016/j.chempr.2022.08.008\n
     Citation2: https://huggingface.co/spaces/woshixuhao/Rf_compare
     ## Function\n
+    Single predict: predict a compound under a given eluent system\n
+    Batch predict: Upload a .csv file with multiple conditions to conduct batch prediction\n
+    Rf compare: predict Rf values of two compounds under different eluents in TLC
     """
         gr.Markdown(model_card)
         with gr.Tab("Single prediction"):
             gr.Interface(fn=predict_single, inputs=["text", "number","number","number","number","number"], outputs='number')
+        with gr.Tab("Batch prediction"):
             gr.Interface(fn=predict_xlsx,description='please upload a .csv file formatted in the form of the example', inputs="file", outputs="file",examples=[os.path.join(os.path.dirname(__file__),"TLC_1.csv")],cache_examples=True)
+        with gr.Tab("Rf compare")
+            gr.Interface(fn=predict_compare, inputs=["text", "text"], outputs=["image","image","image"],
+                         description='input: smiles of two compounds, such as CC(OCC)=O and CC(C)CCC(NC(C)C)=O\n output: three images that show the Rf curve with different eluent ratios under PE/EA, DCM/MeOH, PE/Et2O system.\n\n')
     demo.launch()
     # smile='O=C(OC1C(OC(C)=O)C(OC(C)=O)C(OC(C)=O)C(COC(C)=O)O1)C'
     # eluent=[0,0.9,0,0,0]