import gradio as gr import torch from mammal.keys import ( CLS_PRED, ENCODER_INPUTS_ATTENTION_MASK, ENCODER_INPUTS_STR, ENCODER_INPUTS_TOKENS, ) from mammal.model import Mammal from mammal_demo.demo_framework import MammalObjectBroker, MammalTask class PpiTask(MammalTask): def __init__(self, model_dict): super().__init__(name="Protein-Protein Interaction", model_dict=model_dict) self.description = "Protein-Protein Interaction (PPI)" self.examples = { "protein_calmodulin": "MADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMISELDQDGFIDKEDLHDGDGKISFEEFLNLVNKEMTADVDGDGQVNYEEFVTMMTSK", "protein_calcineurin": "MSSKLLLAGLDIERVLAEKNFYKEWDTWIIEAMNVGDEEVDRIKEFKEDEIFEEAKTLGTAEMQEYKKQKLEEAIEGAFDIFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIRQMWDQNGDWDRIKELKFGEIKKLSAKDTRGTIFIKVFENLGTGVDSEYEDVSKYMLKHQ", } self.markup_text = f""" # Mammal based {self.description} demonstration Given two protein sequences, estimate if the proteins interact or not.""" @staticmethod def positive_token_id(model_holder: MammalObjectBroker): """token for positive binding Args: model (MammalTrainedModel): model holding tokenizer Returns: int: id of positive binding token """ return model_holder.tokenizer_op.get_token_id("<1>") def generate_prompt(self, prot1, prot2): """Formatting prompt to match pre-training syntax Args: prot1 (str): sequance of protein number 1 prot2 (str): sequance of protein number 2 Returns: str: prompt """ prompt = ( "<@TOKENIZER-TYPE=AA>" + "" + f"{prot1}" + "" + f"{prot2}" ) return prompt def crate_sample_dict(self, sample_inputs: dict, model_holder: MammalObjectBroker): # Create and load sample sample_dict = dict() prompt = self.generate_prompt(*sample_inputs) sample_dict[ENCODER_INPUTS_STR] = prompt # Tokenize sample_dict = model_holder.tokenizer_op( sample_dict=sample_dict, key_in=ENCODER_INPUTS_STR, key_out_tokens_ids=ENCODER_INPUTS_TOKENS, key_out_attention_mask=ENCODER_INPUTS_ATTENTION_MASK, ) sample_dict[ENCODER_INPUTS_TOKENS] = torch.tensor( sample_dict[ENCODER_INPUTS_TOKENS] ) sample_dict[ENCODER_INPUTS_ATTENTION_MASK] = torch.tensor( sample_dict[ENCODER_INPUTS_ATTENTION_MASK] ) return sample_dict def run_model(self, sample_dict, model: Mammal): # Generate Prediction batch_dict = model.generate( [sample_dict], output_scores=True, return_dict_in_generate=True, max_new_tokens=5, ) return batch_dict def decode_output(self, batch_dict, model_holder: MammalObjectBroker): # Get output generated_output = model_holder.tokenizer_op._tokenizer.decode( batch_dict[CLS_PRED][0] ) score = batch_dict["model.out.scores"][0][1][ self.positive_token_id(model_holder) ].item() return generated_output, score def create_and_run_prompt(self, model_name, protein1, protein2): model_holder = self.model_dict[model_name] sample_inputs = {"prot1": protein1, "prot2": protein2} sample_dict = self.crate_sample_dict( sample_inputs=sample_inputs, model_holder=model_holder ) prompt = sample_dict[ENCODER_INPUTS_STR] batch_dict = self.run_model(sample_dict=sample_dict, model=model_holder.model) res = prompt, *self.decode_output(batch_dict, model_holder=model_holder) return res def create_demo(self, model_name_widget: gr.component): # """ # ### Using the model from # ```{model} ``` # """ with gr.Group() as demo: gr.Markdown(self.markup_text) with gr.Row(): prot1 = gr.Textbox( label="Protein 1 sequence", # info="standard", interactive=True, lines=3, value=self.examples["protein_calmodulin"], ) prot2 = gr.Textbox( label="Protein 2 sequence", # info="standard", interactive=True, lines=3, value=self.examples["protein_calcineurin"], ) with gr.Row(): run_mammal: gr.Button = gr.Button( "Run Mammal prompt for Protein-Protein Interaction", variant="primary", ) with gr.Row(): prompt_box = gr.Textbox(label="Mammal prompt", lines=5) with gr.Row(): decoded = gr.Textbox(label="Mammal output") score_box = gr.Number(label="PPI score") run_mammal.click( fn=self.create_and_run_prompt, inputs=[model_name_widget, prot1, prot2], outputs=[prompt_box, decoded, score_box], ) with gr.Row(): gr.Markdown( "`````` contains the binding affinity class, which is ```<1>``` for interacting and ```<0>``` for non-interacting" ) demo.visible = False return demo