biomed-multi-alignment

Running

File size: 3,883 Bytes

ac117b5
 
 
 
 
 
 
 
 
81fb8a8
ac117b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cec3465
ac117b5
 
 
cec3465
ac117b5
 
cec3465
ac117b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32cc43e
ac117b5
 
 
cec3465
 
 
ac117b5
 
 
 
32cc43e
 
 
 
ac117b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cec3465
ac117b5
cec3465
ac117b5
 
 
 
cec3465
ac117b5
cec3465
ac117b5
32cc43e
8b16321
32cc43e
ac117b5

import gradio as gr

import torch
from fuse.data.tokenizers.modular_tokenizer.op import ModularTokenizerOp
from mammal.model import Mammal
from mammal.keys import *



model_path="ibm/biomed.omics.bl.sm.ma-ted-458m"
# Load Model
model = Mammal.from_pretrained(model_path)
model.eval()

# Load Tokenizer
tokenizer_op = ModularTokenizerOp.from_pretrained(model_path)

#token for positive binding
positive_token_id=tokenizer_op.get_token_id("<1>")

# Default input proteins
protein_calmodulin = "MADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMISELDQDGFIDKEDLHDGDGKISFEEFLNLVNKEMTADVDGDGQVNYEEFVTMMTSK"
protein_calcineurin = "MSSKLLLAGLDIERVLAEKNFYKEWDTWIIEAMNVGDEEVDRIKEFKEDEIFEEAKTLGTAEMQEYKKQKLEEAIEGAFDIFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIRQMWDQNGDWDRIKELKFGEIKKLSAKDTRGTIFIKVFENLGTGVDSEYEDVSKYMLKHQ"


def format_prompt(prot1,prot2):
    # Formatting prompt to match pre-training syntax
    return f"<@TOKENIZER-TYPE=AA><BINDING_AFFINITY_CLASS><SENTINEL_ID_0><MOLECULAR_ENTITY><MOLECULAR_ENTITY_GENERAL_PROTEIN><SEQUENCE_NATURAL_START>{prot1}<SEQUENCE_NATURAL_END><MOLECULAR_ENTITY><MOLECULAR_ENTITY_GENERAL_PROTEIN><SEQUENCE_NATURAL_START>{prot2}<SEQUENCE_NATURAL_END><EOS>"

def run_prompt(prompt):
    # Create and load sample
    sample_dict = dict()
    sample_dict[ENCODER_INPUTS_STR] = prompt

    # Tokenize
    sample_dict=tokenizer_op(
        sample_dict=sample_dict,
        key_in=ENCODER_INPUTS_STR,
        key_out_tokens_ids=ENCODER_INPUTS_TOKENS,
        key_out_attention_mask=ENCODER_INPUTS_ATTENTION_MASK,
    )
    sample_dict[ENCODER_INPUTS_TOKENS] = torch.tensor(sample_dict[ENCODER_INPUTS_TOKENS])
    sample_dict[ENCODER_INPUTS_ATTENTION_MASK] = torch.tensor(sample_dict[ENCODER_INPUTS_ATTENTION_MASK])


    # Generate Prediction
    batch_dict = model.generate(
        [sample_dict],
        output_scores=True,
        return_dict_in_generate=True,
        max_new_tokens=5,
)


    # Get output
    generated_output = tokenizer_op._tokenizer.decode(batch_dict[CLS_PRED][0])
    score = batch_dict['model.out.scores'][0][1][positive_token_id].item()
    
    return generated_output,score

def create_and_run_prompt(prot1, prot2):
    prompt = format_prompt(prot1, prot2)
    res=prompt, *run_prompt(prompt=prompt)
    return res

def create_application():
    markup_text = f"""
# Mammal based Protein-Protein Interaction (PPI) demonstration

Given two protein sequences, estimate if the proteins interact or not.

### Using the model from 

 ```{model_path} ```
"""

    with gr.Blocks() as demo:
        gr.Markdown(markup_text)
        with gr.Row():
            prot1 = gr.Textbox(
                label="Protein 1 sequence",
                # info="standard",
                interactive=True,
                lines=1,
                value=protein_calmodulin,
            )
            prot2 = gr.Textbox(
                label="Protein 2 sequence",
                # info="standard",
                interactive=True,
                lines=1,
                value=protein_calcineurin,
            )
        with gr.Row():
            run_mammal = gr.Button("Run Mammal prompt for Protein-Protein Interaction",variant='primary')
        with gr.Row():
            prompt_box = gr.Textbox(label="Mammal prompt",lines=5)
        
        with gr.Row():
            decoded = gr.Textbox(label="Mammal output")
            run_mammal.click(
                fn=create_and_run_prompt,
                inputs=[prot1,prot2],
                outputs=[prompt_box,decoded,gr.Number(label='PPI score')]
            )
        with gr.Row():
            gr.Markdown("```<SENTINEL_ID_0>``` contains the binding affinity class, which is ```<1>``` for interacting and ```<0>``` for non-interacting")
            
    return demo

def main():
    demo = create_application()
    demo.launch(show_error=True, share=True)


if __name__ == "__main__":
    main()