Spaces:

ibm
/

biomed-multi-alignment-Protein-Protein-Interaction

Running

App Files Files Community

SagiPolaczek commited on 19 days ago

Commit

ff01709

•

1 Parent(s): d61ac75

Upload 3 files

Browse files

Files changed (3) hide show

README.md +5 -4
app.py +126 -0
requirements.txt +2 -0

README.md CHANGED Viewed

@@ -1,13 +1,14 @@
 ---
-title: Biomed Multi Alignment Protein Protein Interaction
-emoji: 🌍
-colorFrom: indigo
 colorTo: purple
 sdk: gradio
-sdk_version: 5.5.0
 app_file: app.py
 pinned: false
 license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Biomed-multi-alignment Protein-Protein-Interaction
+emoji: 🐁
+colorFrom: gray
 colorTo: purple
 sdk: gradio
+sdk_version: 5.4.0
 app_file: app.py
 pinned: false
 license: apache-2.0
+short_description: Demo for MAMMAL approch Protein-Protein Interaction query
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import gradio as gr
+import torch
+from fuse.data.tokenizers.modular_tokenizer.op import ModularTokenizerOp
+from mammal.model import Mammal
+from mammal.keys import *
+model_path = "ibm/biomed.omics.bl.sm.ma-ted-458m"
+# Load Model
+model = Mammal.from_pretrained(model_path)
+model.eval()
+# Load Tokenizer
+tokenizer_op = ModularTokenizerOp.from_pretrained(model_path)
+# token for positive binding
+positive_token_id = tokenizer_op.get_token_id("<1>")
+# Default input proteins
+protein_calmodulin = "MADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMISELDQDGFIDKEDLHDGDGKISFEEFLNLVNKEMTADVDGDGQVNYEEFVTMMTSK"
+protein_calcineurin = "MSSKLLLAGLDIERVLAEKNFYKEWDTWIIEAMNVGDEEVDRIKEFKEDEIFEEAKTLGTAEMQEYKKQKLEEAIEGAFDIFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIRQMWDQNGDWDRIKELKFGEIKKLSAKDTRGTIFIKVFENLGTGVDSEYEDVSKYMLKHQ"
+def format_prompt(prot1, prot2):
+    # Formatting prompt to match pre-training syntax
+    return f"<@TOKENIZER-TYPE=AA><BINDING_AFFINITY_CLASS><SENTINEL_ID_0><MOLECULAR_ENTITY><MOLECULAR_ENTITY_GENERAL_PROTEIN><SEQUENCE_NATURAL_START>{prot1}<SEQUENCE_NATURAL_END><MOLECULAR_ENTITY><MOLECULAR_ENTITY_GENERAL_PROTEIN><SEQUENCE_NATURAL_START>{prot2}<SEQUENCE_NATURAL_END><EOS>"
+def run_prompt(prompt):
+    # Create and load sample
+    sample_dict = dict()
+    sample_dict[ENCODER_INPUTS_STR] = prompt
+    # Tokenize
+    sample_dict = tokenizer_op(
+        sample_dict=sample_dict,
+        key_in=ENCODER_INPUTS_STR,
+        key_out_tokens_ids=ENCODER_INPUTS_TOKENS,
+        key_out_attention_mask=ENCODER_INPUTS_ATTENTION_MASK,
+    )
+    sample_dict[ENCODER_INPUTS_TOKENS] = torch.tensor(
+        sample_dict[ENCODER_INPUTS_TOKENS]
+    )
+    sample_dict[ENCODER_INPUTS_ATTENTION_MASK] = torch.tensor(
+        sample_dict[ENCODER_INPUTS_ATTENTION_MASK]
+    )
+    # Generate Prediction
+    batch_dict = model.generate(
+        [sample_dict],
+        output_scores=True,
+        return_dict_in_generate=True,
+        max_new_tokens=5,
+    )
+    # Get output
+    generated_output = tokenizer_op._tokenizer.decode(batch_dict[CLS_PRED][0])
+    score = batch_dict["model.out.scores"][0][1][positive_token_id].item()
+    return generated_output, score
+def create_and_run_prompt(prot1, prot2):
+    prompt = format_prompt(prot1, prot2)
+    res = prompt, *run_prompt(prompt=prompt)
+    return res
+def create_application():
+    markup_text = f"""
+# Mammal based Protein-Protein Interaction (PPI) demonstration
+Given two protein sequences, estimate if the proteins interact or not.
+### Using the model from
+ ```{model_path} ```
+"""
+    with gr.Blocks() as demo:
+        gr.Markdown(markup_text)
+        with gr.Row():
+            prot1 = gr.Textbox(
+                label="Protein 1 sequence",
+                # info="standard",
+                interactive=True,
+                lines=1,
+                value=protein_calmodulin,
+            )
+            prot2 = gr.Textbox(
+                label="Protein 2 sequence",
+                # info="standard",
+                interactive=True,
+                lines=1,
+                value=protein_calcineurin,
+            )
+        with gr.Row():
+            run_mammal = gr.Button(
+                "Run Mammal prompt for Protein-Protein Interaction", variant="primary"
+            )
+        with gr.Row():
+            prompt_box = gr.Textbox(label="Mammal prompt", lines=5)
+        with gr.Row():
+            decoded = gr.Textbox(label="Mammal output")
+            run_mammal.click(
+                fn=create_and_run_prompt,
+                inputs=[prot1, prot2],
+                outputs=[prompt_box, decoded, gr.Number(label="PPI score")],
+            )
+        with gr.Row():
+            gr.Markdown(
+                "```<SENTINEL_ID_0>``` contains the binding affinity class, which is ```<1>``` for interacting and ```<0>``` for non-interacting"
+            )
+    return demo
+def main():
+    demo = create_application()
+    demo.launch(show_error=True, share=True)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # for the mammal demo app
2	+ mammal @ git+https://github.com/BiomedSciAI/biomed-multi-alignment.git