M3Site / app.py
anonymousforpaper's picture
Upload 103 files
224a33f verified
import copy
import gradio as gr
from gradio_molecule3d import Molecule3D
import Bio
import Bio.SeqUtils
from utils.util_functions import merge_ranges
from predict import model_predict
from constants import *
def update_reps_based_on_radio(*args):
struct, text = args[0], args[1]
background, model, active_sites = args[2:4], args[4], args[5:]
predicted_sites, confs, sequence = model_predict(model, struct, text)
merged_sites = merge_ranges(predicted_sites, max_value=len(sequence))
confidence_details = []
new_reps = []
# 1. cal summary
summary_text = []
for k, v in predicted_sites.items():
if len(v) > 0:
summary_text.append(f"{len(v)} {no_cat_dict[k]} site(s)")
if len(summary_text) == 0:
summary_text = ["No active sites identified."]
summary_text = '; '.join(summary_text)
# 2. cal dataframe
detail_predicted_sites = {'b':[], '0':[], '1':[], '2':[], '3':[], '4':[], '5':[]}
ass = []
for k, v in predicted_sites.items():
for vv in v:
detail_predicted_sites[k].append(
{'residue_type': sequence[vv-1], 'number': vv, 'confidence': confs[vv-1]}
)
ass.append(vv)
for i in range(len(sequence)):
if i+1 not in ass:
detail_predicted_sites['b'].append(
{'residue_type': sequence[i], 'number': i+1, 'confidence': confs[i]}
)
# 2.1 处理背景
backgrounds = detail_predicted_sites.get('b', [])
for r in backgrounds:
confidence_details.append([
'Background',
Bio.SeqUtils.seq3(r['residue_type']).upper(),
r['number'],
r.get('confidence', 'N/A')
])
# 2.2 处理活性位点
for i in range(0, len(active_sites), 2):
x, y = active_sites[i], active_sites[i+1]
site_key = str(i//2)
sites = detail_predicted_sites.get(site_key, [])
for s in sites:
confidence_details.append([
no_cat_dict[site_key],
Bio.SeqUtils.seq3(s['residue_type']).upper(),
s['number'],
s.get('confidence', 'N/A')
])
# 3. cal reps
# 3.1 background
ranges = merged_sites['b']
for r in ranges:
old_reps = copy.deepcopy(default_reps)[0]
old_reps['style'] = background[0][0].lower() + background[0][1:]
old_reps['color'] = background[1][0].lower() + background[1][1:] + "Carbon"
old_reps['residue_range'] = r
new_reps.append(old_reps)
# 3.2 active sites
for i in range(0, len(active_sites), 2):
x, y = active_sites[i], active_sites[i+1]
ranges = merged_sites[str(i//2)]
for r in ranges:
old_reps = copy.deepcopy(default_reps)[0]
old_reps['style'] = x[0].lower() + x[1:]
old_reps['color'] = y[0].lower() + y[1:] + "Carbon"
old_reps['residue_range'] = r
new_reps.append(old_reps)
return summary_text, confidence_details, Molecule3D(label="Identified Functional Sites", reps=new_reps)
def disable_fn(*x):
return [gr.update(interactive=False)] * len(x)
def able_tip():
return gr.update(visible=True)
def check_input(input):
if input is not None:
return gr.update(interactive=True)
return gr.update(interactive=False)
with gr.Blocks(title="M3Site-app", theme=gr.themes.Default()) as demo:
gr.Markdown("# M<sup>3</sup>Site: Leveraging Multi-Class Multi-Modal Learning for Accurate Protein Active Site Identification and Classification")
gr.Markdown("""
## Overview
**M<sup>3</sup>Site** is an advanced tool designed to accurately identify and classify protein active sites using a multi-modal learning approach. By integrating protein sequences, structural data, and functional annotations, M<sup>3</sup>Site provides comprehensive insights into protein functionality, aiding in drug design, synthetic biology, and understanding protein mechanisms.
""")
gr.Markdown("""
## How to Use
1. **Select the Model**: Choose the pre-trained model for site prediction from the dropdown list.
2. **Adjust Visual Settings**: Customize the visual style and color for background and active sites.
3. **Upload Protein Structure**: Provide the 3D structure of the protein. You can upload from local or download from PDB Assym. Unit, PDB BioAssembly, AlphaFold DB, or ESMFold DB.
4. **Enter Function Prompt**: Optionally provide a text description of the protein's function. If unsure, leave it blank.
5. **Click "Predict"**: Hit the 'Predict' button to initiate the prediction. The predicted active sites will be highlighted in the structure visualization.
6. **View Results**: The detailed results will be displayed below, including the identified active sites, their types, and confidence scores.
""")
with gr.Accordion("General Settings (Set before prediction)"):
with gr.Row():
model_drop = gr.Dropdown(model_list, label="Model Selection", value=model_list[0])
gr.Markdown("")
gr.Markdown("")
with gr.Row():
with gr.Row():
style_dropb = gr.Dropdown(style_list, label="Style (Background)", value=style_list[0], min_width=1)
color_dropb = gr.Dropdown(color_list, label="Color (Background)", value=color_list[0], min_width=1)
with gr.Row():
style_drop1 = gr.Dropdown(style_list, label="Style (CRI)", value=style_list[1], min_width=1)
color_drop1 = gr.Dropdown(color_list, label="Color (CRI)", value=color_list[1], min_width=1)
with gr.Row():
style_drop2 = gr.Dropdown(style_list, label="Style (SCI)", value=style_list[1], min_width=1)
color_drop2 = gr.Dropdown(color_list, label="Color (SCI)", value=color_list[2], min_width=1)
with gr.Row():
style_drop3 = gr.Dropdown(style_list, label="Style (PI)", value=style_list[1], min_width=1)
color_drop3 = gr.Dropdown(color_list, label="Color (PI)", value=color_list[3], min_width=1)
with gr.Row():
with gr.Row():
style_drop4 = gr.Dropdown(style_list, label="Style (PTCR)", value=style_list[1], min_width=1)
color_drop4 = gr.Dropdown(color_list, label="Color (PTCR)", value=color_list[4], min_width=1)
with gr.Row():
style_drop5 = gr.Dropdown(style_list, label="Style (IA)", value=style_list[1], min_width=1)
color_drop5 = gr.Dropdown(color_list, label="Color (IA)", value=color_list[5], min_width=1)
with gr.Row():
style_drop6 = gr.Dropdown(style_list, label="Style (SSA)", value=style_list[1], min_width=1)
color_drop6 = gr.Dropdown(color_list, label="Color (SSA)", value=color_list[6], min_width=1)
with gr.Row():
gr.Markdown("")
gr.Markdown('''
*NOTE:* CRI indicates Covalent Reaction Intermediates, SCI indicates Sulfur-containing Covalent Intermediates, PI indicates Phosphorylated Intermediates,
PTCR indicates Proton Transfer & Charge Relay Systems, IA indicates Isomerization Activity, SSA indicates Substrate-specific Activities.
''')
with gr.Row():
gr.Markdown("<center><font size=5><b>Input Structure</b></font></center>")
gr.Markdown("<center><font size=5><b>Output Predictions</b></font></center>")
with gr.Row(equal_height=True):
input_struct = Molecule3D(label="Input Protein Structure (Default Style)", reps=reps1)
output_struct = Molecule3D(label="Output Protein Structure", reps=[])
with gr.Row(equal_height=True):
input_text = gr.Textbox(lines=1, label="Function Prompt", scale=16, min_width=1, placeholder="I don't know the function of this protein.")
btn = gr.Button("Predict", variant="primary", scale=1, min_width=1, interactive=False)
summary_output = gr.Label(label="", scale=18, min_width=1, show_label=False, elem_classes="info")
gr.Markdown("### Result Details")
confidence_output = gr.DataFrame(headers=["Active Site Type", "Residue Type", "Residue Number", "Confidence"])
option_list = [
style_dropb, color_dropb, model_drop,
style_drop1, color_drop1,
style_drop2, color_drop2,
style_drop3, color_drop3,
style_drop4, color_drop4,
style_drop5, color_drop5,
style_drop6, color_drop6
]
tips = gr.Markdown("### *Tips: Please refresh the page to make a new prediction.*", visible=False)
# gr.Markdown("## Citation")
# gr.Markdown("If you find this tool helpful, please consider citing the following papers:")
# with gr.Accordion("Citations", open=False):
# gr.Markdown('''```
# @inproceedings{ouyangmmsite,
# title={MMSite: A Multi-modal Framework for the Identification of Active Sites in Proteins},
# author={Ouyang, Song and Cai, Huiyu and Luo, Yong and Su, Kehua and Zhang, Lefei and Du, Bo},
# booktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems}
# }
# @article{ouyangm3site,
# title={M3Site: Leveraging Multi-Class Multi-Modal Learning for Accurate Protein Active Site Iden-tification and Classification},
# author={Ouyang, Song and Luo, Yong and Su, Kehua and Zhang, Lefei and Du, Bo},
# journal={xxxx},
# year={xxxx},
# }
# ```''')
# 绑定事件
input_struct.change(check_input, inputs=input_struct, outputs=btn)
btn.click(
fn=able_tip,
inputs=[],
outputs=tips
).then(
fn=disable_fn,
inputs=option_list,
outputs=option_list
).then(
fn=update_reps_based_on_radio,
inputs=[input_struct, input_text] + option_list,
outputs=[summary_output, confidence_output, output_struct]
).then(
fn=lambda x: x,
inputs=[input_struct],
outputs=[output_struct]
)
if __name__ == "__main__":
demo.launch(share=True, debug=True)