matanninio commited on
Commit
ac117b5
1 Parent(s): 57bef8f

inital verion

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +111 -0
  3. requirements.txt +2 -0
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: Biomed-multi-alignment Protein-Protein-Interaction
3
- emoji: 🐠
4
  colorFrom: gray
5
  colorTo: purple
6
  sdk: gradio
 
1
  ---
2
  title: Biomed-multi-alignment Protein-Protein-Interaction
3
+ emoji: 🐁
4
  colorFrom: gray
5
  colorTo: purple
6
  sdk: gradio
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ import torch
4
+ from fuse.data.tokenizers.modular_tokenizer.op import ModularTokenizerOp
5
+ from mammal.model import Mammal
6
+ from mammal.keys import *
7
+
8
+
9
+
10
+ model_path="ibm/biomed.omics.bl.sm.ma-ted-400m"
11
+ # Load Model
12
+ model = Mammal.from_pretrained(model_path)
13
+ model.eval()
14
+
15
+ # Load Tokenizer
16
+ tokenizer_op = ModularTokenizerOp.from_pretrained(model_path)
17
+
18
+ #token for positive binding
19
+ positive_token_id=tokenizer_op.get_token_id("<1>")
20
+
21
+ # Default input proteins
22
+ protein_calmodulin = "MADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMISELDQDGFIDKEDLHDGDGKISFEEFLNLVNKEMTADVDGDGQVNYEEFVTMMTSK"
23
+ protein_calcineurin = "MSSKLLLAGLDIERVLAEKNFYKEWDTWIIEAMNVGDEEVDRIKEFKEDEIFEEAKTLGTAEMQEYKKQKLEEAIEGAFDIFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIRQMWDQNGDWDRIKELKFGEIKKLSAKDTRGTIFIKVFENLGTGVDSEYEDVSKYMLKHQ"
24
+
25
+
26
+ def format_query(prot1,prot2):
27
+ # Formatting prompt to match pre-training syntax
28
+ return f"<@TOKENIZER-TYPE=AA><BINDING_AFFINITY_CLASS><SENTINEL_ID_0><MOLECULAR_ENTITY><MOLECULAR_ENTITY_GENERAL_PROTEIN><SEQUENCE_NATURAL_START>{prot1}<SEQUENCE_NATURAL_END><MOLECULAR_ENTITY><MOLECULAR_ENTITY_GENERAL_PROTEIN><SEQUENCE_NATURAL_START>{prot2}<SEQUENCE_NATURAL_END><EOS>"
29
+
30
+ def run_query(query):
31
+ # Create and load sample
32
+ sample_dict = dict()
33
+ sample_dict[ENCODER_INPUTS_STR] = query
34
+
35
+ # Tokenize
36
+ sample_dict=tokenizer_op(
37
+ sample_dict=sample_dict,
38
+ key_in=ENCODER_INPUTS_STR,
39
+ key_out_tokens_ids=ENCODER_INPUTS_TOKENS,
40
+ key_out_attention_mask=ENCODER_INPUTS_ATTENTION_MASK,
41
+ )
42
+ sample_dict[ENCODER_INPUTS_TOKENS] = torch.tensor(sample_dict[ENCODER_INPUTS_TOKENS])
43
+ sample_dict[ENCODER_INPUTS_ATTENTION_MASK] = torch.tensor(sample_dict[ENCODER_INPUTS_ATTENTION_MASK])
44
+
45
+
46
+ # Generate Prediction
47
+ batch_dict = model.generate(
48
+ [sample_dict],
49
+ output_scores=True,
50
+ return_dict_in_generate=True,
51
+ max_new_tokens=5,
52
+ )
53
+
54
+
55
+ # Get output
56
+ generated_output = tokenizer_op._tokenizer.decode(batch_dict[CLS_PRED][0])
57
+ score = batch_dict['model.out.scores'][0][1][positive_token_id].item
58
+
59
+ return generated_output,score
60
+
61
+ def create_and_run_query(prot1, prot2):
62
+ query = format_query(prot1, prot2)
63
+ res=query, *run_query(query=query)
64
+ return res
65
+
66
+ def create_application():
67
+ markup_text = f"""
68
+ # Mammal protein binding demonstration
69
+ ### Using the model from
70
+
71
+ ```{model_path} ```
72
+ """
73
+
74
+ with gr.Blocks() as demo:
75
+ gr.Markdown(markup_text)
76
+ with gr.Row():
77
+ prot1 = gr.Textbox(
78
+ label="Protein 1 sequence",
79
+ # info="standard",
80
+ interactive=True,
81
+ lines=1,
82
+ value=protein_calmodulin,
83
+ )
84
+ prot2 = gr.Textbox(
85
+ label="Protein 2 sequence",
86
+ # info="standard",
87
+ interactive=True,
88
+ lines=1,
89
+ value=protein_calcineurin,
90
+ )
91
+ with gr.Row():
92
+ run_mammal = gr.Button("Run Mammal query")
93
+ with gr.Row():
94
+ query_box = gr.Textbox(label="Mammal query",lines=5)
95
+
96
+ with gr.Row():
97
+ decoded = gr.Textbox(label="Mammal output")
98
+ run_mammal.click(
99
+ fn=create_and_run_query,
100
+ inputs=[prot1,prot2],
101
+ outputs=[query_box,decoded,gr.Number(label='binding score')]
102
+ )
103
+ return demo
104
+
105
+ def main():
106
+ demo = create_application()
107
+ demo.launch(show_error=True, share=True)
108
+
109
+
110
+ if __name__ == "__main__":
111
+ main()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # for the mammal demo app
2
+ mammal @ git+https://github.com/BiomedSciAI/biomed-multi-alignment.git