Spaces:
Runtime error
Runtime error
Commit
·
8e0bbdf
1
Parent(s):
cfdf2d4
test submoodule
Browse files- .gitignore +2 -0
- .gitmodules +3 -0
- README.md +4 -4
- app.py +11 -0
- mega +1 -0
- ps4_models/Conv/PS4-Conv_epoch-5_loss-0.652_acc-77.905.pt +3 -0
- ps4_models/Mega/PS4-Mega_loss-0.633_acc-78.176.pt +3 -0
- ps4_models/classifiers.py +143 -0
- requirements.txt +9 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
.DS_Store
|
2 |
+
.idea/
|
.gitmodules
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[submodule "mega"]
|
2 |
+
path = mega
|
3 |
+
url = https://github.com/facebookresearch/mega
|
README.md
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
---
|
2 |
title: Protein Secondary Structure Prediction
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.23.0
|
8 |
app_file: app.py
|
9 |
-
pinned:
|
10 |
license: cc-by-4.0
|
11 |
---
|
12 |
|
|
|
1 |
---
|
2 |
title: Protein Secondary Structure Prediction
|
3 |
+
emoji: 🧬
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: blue
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.23.0
|
8 |
app_file: app.py
|
9 |
+
pinned: true
|
10 |
license: cc-by-4.0
|
11 |
---
|
12 |
|
app.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from ps4_models.classifiers import *
|
3 |
+
|
4 |
+
|
5 |
+
def pred(seq):
|
6 |
+
model = PS4_Mega()
|
7 |
+
return "Hello " + seq + "!!"
|
8 |
+
|
9 |
+
|
10 |
+
iface = gr.Interface(fn=pred, inputs="amino acid sequence", outputs="secondary structure sequence")
|
11 |
+
iface.launch()
|
mega
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Subproject commit aeaa4b44592cd1d60a9a34554e359eda2a62b03b
|
ps4_models/Conv/PS4-Conv_epoch-5_loss-0.652_acc-77.905.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f111dae67ca7296e2463ccf12edde124293e6ddc4ac6e69db103e29137bc579
|
3 |
+
size 19396615
|
ps4_models/Mega/PS4-Mega_loss-0.633_acc-78.176.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17b70fea89014bead5c6bf180a8ca3573a84eb336cf194d9c3c0fc1dd70f49cd
|
3 |
+
size 335365083
|
ps4_models/classifiers.py
ADDED
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
from ps4_data.utils import SS_CLASSES
|
4 |
+
from mega.fairseq.modules.mega_layer import MegaEncoderLayer
|
5 |
+
|
6 |
+
|
7 |
+
class PS4_Conv(torch.nn.Module):
|
8 |
+
def __init__(self):
|
9 |
+
super(PS4_Conv, self).__init__()
|
10 |
+
# This is only called "elmo_feature_extractor" for historic reason
|
11 |
+
# CNN weights are trained on ProtT5 embeddings
|
12 |
+
self.elmo_feature_extractor = torch.nn.Sequential(
|
13 |
+
torch.nn.Conv2d(1024, 512, kernel_size=(7, 1), padding=(3, 0)), # 7x512
|
14 |
+
torch.nn.ReLU(),
|
15 |
+
torch.nn.Dropout(0.1),
|
16 |
+
torch.nn.Conv2d(512, 256, kernel_size=(7, 1), padding=(3, 0)), # 7x256
|
17 |
+
torch.nn.ReLU(),
|
18 |
+
torch.nn.Dropout(0.1),
|
19 |
+
torch.nn.Conv2d(256, 128, kernel_size=(7, 1), padding=(3, 0)), # 7x128
|
20 |
+
torch.nn.ReLU(),
|
21 |
+
torch.nn.Dropout(0.1),
|
22 |
+
torch.nn.Conv2d(128, 32, kernel_size=(7, 1), padding=(3, 0)), # 7x32
|
23 |
+
torch.nn.ReLU(),
|
24 |
+
torch.nn.Dropout(0.1)
|
25 |
+
)
|
26 |
+
n_final_in = 32
|
27 |
+
|
28 |
+
self.dssp8_classifier = torch.nn.Sequential(
|
29 |
+
torch.nn.Conv2d(n_final_in, 8, kernel_size=(7, 1), padding=(3, 0))
|
30 |
+
)
|
31 |
+
|
32 |
+
def forward(self, x):
|
33 |
+
# IN: X = (B x L x F); OUT: (B x F x L, 1)
|
34 |
+
x = x.permute(0, 2, 1).unsqueeze(dim=-1)
|
35 |
+
x = self.elmo_feature_extractor(x) # OUT: (B x 32 x L x 1)
|
36 |
+
d8_yhat = self.dssp8_classifier(x).squeeze(dim=-1).permute(0, 2, 1) # OUT: (B x L x 8)
|
37 |
+
|
38 |
+
return d8_yhat
|
39 |
+
|
40 |
+
|
41 |
+
class PS4_Mega(nn.Module):
|
42 |
+
def __init__(self, nb_layers=11, l_aux_dim=1024, model_parallel=False,
|
43 |
+
h_dim=1024, batch_size=1, seq_len=1, dropout=0.0):
|
44 |
+
super(PS4_Mega, self).__init__()
|
45 |
+
|
46 |
+
self.nb_layers = nb_layers
|
47 |
+
self.h_dim = h_dim
|
48 |
+
self.batch_size = batch_size
|
49 |
+
self.seq_len = seq_len
|
50 |
+
self.dropout = dropout
|
51 |
+
self.aux_emb_size_l = l_aux_dim
|
52 |
+
self.input_size = l_aux_dim
|
53 |
+
|
54 |
+
self.args = ArgHolder(emb_dim=self.input_size, dropout=dropout, hdim=h_dim)
|
55 |
+
|
56 |
+
self.nb_tags = SS_CLASSES
|
57 |
+
|
58 |
+
self.model_parallel = model_parallel
|
59 |
+
|
60 |
+
# build actual NN
|
61 |
+
self.__build_model()
|
62 |
+
|
63 |
+
def __build_model(self):
|
64 |
+
|
65 |
+
# design Sequence processing module
|
66 |
+
|
67 |
+
megas = []
|
68 |
+
for i in range(self.nb_layers):
|
69 |
+
mega = MegaEncoderLayer(self.args)
|
70 |
+
megas.append(mega)
|
71 |
+
|
72 |
+
self.seq_unit = MegaSequence(*megas)
|
73 |
+
|
74 |
+
self.dropout_i = nn.Dropout(max(0.0, self.dropout - 0.2))
|
75 |
+
|
76 |
+
# output layer which projects back to tag space
|
77 |
+
out_dim = self.input_size
|
78 |
+
|
79 |
+
self.hidden_to_tag = nn.Linear(out_dim, self.nb_tags, bias=False)
|
80 |
+
|
81 |
+
def init_hidden(self):
|
82 |
+
# the weights are of the form (nb_layers, batch_size, nb_rnn_units)
|
83 |
+
hidden_a = torch.randn(self.nb_rnn_layers, self.batch_size, self.aux_emb_size_l)
|
84 |
+
|
85 |
+
if torch.cuda.is_available():
|
86 |
+
hidden_a = hidden_a.cuda()
|
87 |
+
|
88 |
+
return hidden_a
|
89 |
+
|
90 |
+
def forward(self, r):
|
91 |
+
|
92 |
+
self.seq_len = r.shape[1]
|
93 |
+
|
94 |
+
# residue encoding
|
95 |
+
R = r.view(self.seq_len, self.batch_size, self.aux_emb_size_l)
|
96 |
+
|
97 |
+
X = self.dropout_i(R)
|
98 |
+
|
99 |
+
# Run through MEGA
|
100 |
+
X = self.seq_unit(X, encoder_padding_mask=None)
|
101 |
+
X = X.view(self.batch_size, self.seq_len, self.input_size)
|
102 |
+
|
103 |
+
# run through linear layer
|
104 |
+
X = self.hidden_to_tag(X)
|
105 |
+
|
106 |
+
Y_hat = X
|
107 |
+
return Y_hat
|
108 |
+
|
109 |
+
|
110 |
+
class MegaSequence(nn.Sequential):
|
111 |
+
def forward(self, input, **kwargs):
|
112 |
+
for module in self:
|
113 |
+
options = kwargs if isinstance(module, MegaEncoderLayer) else {}
|
114 |
+
input = module(input, **options)
|
115 |
+
return input
|
116 |
+
|
117 |
+
|
118 |
+
class ArgHolder(object):
|
119 |
+
def __init__(self, hdim=512, dropout=0.1, emb_dim=1024):
|
120 |
+
super(object, self).__init__()
|
121 |
+
|
122 |
+
self.encoder_embed_dim = emb_dim
|
123 |
+
self.encoder_hidden_dim = hdim
|
124 |
+
self.dropout = dropout
|
125 |
+
self.encoder_ffn_embed_dim = 1024
|
126 |
+
self.ffn_hidden_dim: int = 1024
|
127 |
+
self.encoder_z_dim: int = 128
|
128 |
+
self.encoder_n_dim: int = 16
|
129 |
+
self.activation_fn: str = 'silu'
|
130 |
+
self.attention_activation_fn: str = 'softmax'
|
131 |
+
self.attention_dropout: float = 0.0
|
132 |
+
self.activation_dropout: float = 0.0
|
133 |
+
self.hidden_dropout: float = 0.0
|
134 |
+
self.encoder_chunk_size: int = -1
|
135 |
+
self.truncation_length: int = None
|
136 |
+
self.rel_pos_bias: str = 'simple'
|
137 |
+
self.max_source_positions: int = 2048
|
138 |
+
self.normalization_type: str = 'layernorm'
|
139 |
+
self.normalize_before: bool = False
|
140 |
+
self.feature_dropout: bool = False
|
141 |
+
|
142 |
+
|
143 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch~=1.9.0
|
2 |
+
fairseq~=0.9.0
|
3 |
+
numpy~=1.21.1
|
4 |
+
scikit-learn~=0.24.2
|
5 |
+
transformers~=4.26.1
|
6 |
+
setuptools~=57.4.0
|
7 |
+
pandas~=1.3.2
|
8 |
+
wget~=3.2
|
9 |
+
gradio~=3.23.0
|