omarperacha commited on
Commit
8e0bbdf
·
1 Parent(s): cfdf2d4

test submoodule

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .DS_Store
2
+ .idea/
.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "mega"]
2
+ path = mega
3
+ url = https://github.com/facebookresearch/mega
README.md CHANGED
@@ -1,12 +1,12 @@
1
  ---
2
  title: Protein Secondary Structure Prediction
3
- emoji: 🐨
4
- colorFrom: yellow
5
- colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 3.23.0
8
  app_file: app.py
9
- pinned: false
10
  license: cc-by-4.0
11
  ---
12
 
 
1
  ---
2
  title: Protein Secondary Structure Prediction
3
+ emoji: 🧬
4
+ colorFrom: purple
5
+ colorTo: blue
6
  sdk: gradio
7
  sdk_version: 3.23.0
8
  app_file: app.py
9
+ pinned: true
10
  license: cc-by-4.0
11
  ---
12
 
app.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from ps4_models.classifiers import *
3
+
4
+
5
+ def pred(seq):
6
+ model = PS4_Mega()
7
+ return "Hello " + seq + "!!"
8
+
9
+
10
+ iface = gr.Interface(fn=pred, inputs="amino acid sequence", outputs="secondary structure sequence")
11
+ iface.launch()
mega ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit aeaa4b44592cd1d60a9a34554e359eda2a62b03b
ps4_models/Conv/PS4-Conv_epoch-5_loss-0.652_acc-77.905.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f111dae67ca7296e2463ccf12edde124293e6ddc4ac6e69db103e29137bc579
3
+ size 19396615
ps4_models/Mega/PS4-Mega_loss-0.633_acc-78.176.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17b70fea89014bead5c6bf180a8ca3573a84eb336cf194d9c3c0fc1dd70f49cd
3
+ size 335365083
ps4_models/classifiers.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from ps4_data.utils import SS_CLASSES
4
+ from mega.fairseq.modules.mega_layer import MegaEncoderLayer
5
+
6
+
7
+ class PS4_Conv(torch.nn.Module):
8
+ def __init__(self):
9
+ super(PS4_Conv, self).__init__()
10
+ # This is only called "elmo_feature_extractor" for historic reason
11
+ # CNN weights are trained on ProtT5 embeddings
12
+ self.elmo_feature_extractor = torch.nn.Sequential(
13
+ torch.nn.Conv2d(1024, 512, kernel_size=(7, 1), padding=(3, 0)), # 7x512
14
+ torch.nn.ReLU(),
15
+ torch.nn.Dropout(0.1),
16
+ torch.nn.Conv2d(512, 256, kernel_size=(7, 1), padding=(3, 0)), # 7x256
17
+ torch.nn.ReLU(),
18
+ torch.nn.Dropout(0.1),
19
+ torch.nn.Conv2d(256, 128, kernel_size=(7, 1), padding=(3, 0)), # 7x128
20
+ torch.nn.ReLU(),
21
+ torch.nn.Dropout(0.1),
22
+ torch.nn.Conv2d(128, 32, kernel_size=(7, 1), padding=(3, 0)), # 7x32
23
+ torch.nn.ReLU(),
24
+ torch.nn.Dropout(0.1)
25
+ )
26
+ n_final_in = 32
27
+
28
+ self.dssp8_classifier = torch.nn.Sequential(
29
+ torch.nn.Conv2d(n_final_in, 8, kernel_size=(7, 1), padding=(3, 0))
30
+ )
31
+
32
+ def forward(self, x):
33
+ # IN: X = (B x L x F); OUT: (B x F x L, 1)
34
+ x = x.permute(0, 2, 1).unsqueeze(dim=-1)
35
+ x = self.elmo_feature_extractor(x) # OUT: (B x 32 x L x 1)
36
+ d8_yhat = self.dssp8_classifier(x).squeeze(dim=-1).permute(0, 2, 1) # OUT: (B x L x 8)
37
+
38
+ return d8_yhat
39
+
40
+
41
+ class PS4_Mega(nn.Module):
42
+ def __init__(self, nb_layers=11, l_aux_dim=1024, model_parallel=False,
43
+ h_dim=1024, batch_size=1, seq_len=1, dropout=0.0):
44
+ super(PS4_Mega, self).__init__()
45
+
46
+ self.nb_layers = nb_layers
47
+ self.h_dim = h_dim
48
+ self.batch_size = batch_size
49
+ self.seq_len = seq_len
50
+ self.dropout = dropout
51
+ self.aux_emb_size_l = l_aux_dim
52
+ self.input_size = l_aux_dim
53
+
54
+ self.args = ArgHolder(emb_dim=self.input_size, dropout=dropout, hdim=h_dim)
55
+
56
+ self.nb_tags = SS_CLASSES
57
+
58
+ self.model_parallel = model_parallel
59
+
60
+ # build actual NN
61
+ self.__build_model()
62
+
63
+ def __build_model(self):
64
+
65
+ # design Sequence processing module
66
+
67
+ megas = []
68
+ for i in range(self.nb_layers):
69
+ mega = MegaEncoderLayer(self.args)
70
+ megas.append(mega)
71
+
72
+ self.seq_unit = MegaSequence(*megas)
73
+
74
+ self.dropout_i = nn.Dropout(max(0.0, self.dropout - 0.2))
75
+
76
+ # output layer which projects back to tag space
77
+ out_dim = self.input_size
78
+
79
+ self.hidden_to_tag = nn.Linear(out_dim, self.nb_tags, bias=False)
80
+
81
+ def init_hidden(self):
82
+ # the weights are of the form (nb_layers, batch_size, nb_rnn_units)
83
+ hidden_a = torch.randn(self.nb_rnn_layers, self.batch_size, self.aux_emb_size_l)
84
+
85
+ if torch.cuda.is_available():
86
+ hidden_a = hidden_a.cuda()
87
+
88
+ return hidden_a
89
+
90
+ def forward(self, r):
91
+
92
+ self.seq_len = r.shape[1]
93
+
94
+ # residue encoding
95
+ R = r.view(self.seq_len, self.batch_size, self.aux_emb_size_l)
96
+
97
+ X = self.dropout_i(R)
98
+
99
+ # Run through MEGA
100
+ X = self.seq_unit(X, encoder_padding_mask=None)
101
+ X = X.view(self.batch_size, self.seq_len, self.input_size)
102
+
103
+ # run through linear layer
104
+ X = self.hidden_to_tag(X)
105
+
106
+ Y_hat = X
107
+ return Y_hat
108
+
109
+
110
+ class MegaSequence(nn.Sequential):
111
+ def forward(self, input, **kwargs):
112
+ for module in self:
113
+ options = kwargs if isinstance(module, MegaEncoderLayer) else {}
114
+ input = module(input, **options)
115
+ return input
116
+
117
+
118
+ class ArgHolder(object):
119
+ def __init__(self, hdim=512, dropout=0.1, emb_dim=1024):
120
+ super(object, self).__init__()
121
+
122
+ self.encoder_embed_dim = emb_dim
123
+ self.encoder_hidden_dim = hdim
124
+ self.dropout = dropout
125
+ self.encoder_ffn_embed_dim = 1024
126
+ self.ffn_hidden_dim: int = 1024
127
+ self.encoder_z_dim: int = 128
128
+ self.encoder_n_dim: int = 16
129
+ self.activation_fn: str = 'silu'
130
+ self.attention_activation_fn: str = 'softmax'
131
+ self.attention_dropout: float = 0.0
132
+ self.activation_dropout: float = 0.0
133
+ self.hidden_dropout: float = 0.0
134
+ self.encoder_chunk_size: int = -1
135
+ self.truncation_length: int = None
136
+ self.rel_pos_bias: str = 'simple'
137
+ self.max_source_positions: int = 2048
138
+ self.normalization_type: str = 'layernorm'
139
+ self.normalize_before: bool = False
140
+ self.feature_dropout: bool = False
141
+
142
+
143
+
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ torch~=1.9.0
2
+ fairseq~=0.9.0
3
+ numpy~=1.21.1
4
+ scikit-learn~=0.24.2
5
+ transformers~=4.26.1
6
+ setuptools~=57.4.0
7
+ pandas~=1.3.2
8
+ wget~=3.2
9
+ gradio~=3.23.0