geetu040 commited on
Commit
d08668b
·
1 Parent(s): 2b4a01c

Initial Upload

Browse files
Files changed (8) hide show
  1. Dockerfile +14 -0
  2. dumps/model.pt +3 -0
  3. dumps/params.json +1 -0
  4. dumps/vocab.pt +3 -0
  5. main.py +31 -0
  6. requirements.txt +6 -0
  7. utils/model.py +106 -0
  8. utils/preprocess.py +19 -0
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ WORKDIR /code
7
+
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
+
12
+ COPY . .
13
+
14
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
dumps/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d5d911575429b382c886c2b9764ba3226128fe1d1368ec77bbcca6925014db1
3
+ size 4465302
dumps/params.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [8000, 128, 0, 64, 1, 2, [64, 64], 0.4]
dumps/vocab.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75ea0d5a228a81d16d25cda6f207ce443f1469431497cdcf914384d2e642907b
3
+ size 131115
main.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ======= PREPARING THE PIPELINE =======
2
+
3
+ import torch
4
+ import os
5
+ from utils.preprocess import get_preprocess
6
+ from utils.model import get_model
7
+
8
+ dump_path = "./dumps/"
9
+ vocab_path = os.path.join(dump_path, "vocab.pt")
10
+ model_path = os.path.join(dump_path, "model.pt")
11
+ params_path = os.path.join(dump_path, "params.json")
12
+
13
+ preprocess = get_preprocess(vocab_path)
14
+ model = get_model(model_path, params_path)
15
+
16
+ def predict(text):
17
+ x = preprocess(text)
18
+ x = torch.tensor([x])
19
+ y = model(x)
20
+ y = y.detach().numpy().tolist()[0]
21
+ return y
22
+
23
+ # ======= CREATING APP =======
24
+
25
+ from fastapi import FastAPI
26
+
27
+ app = FastAPI()
28
+
29
+ @app.get("/")
30
+ def main(text: str):
31
+ return predict(text)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ torch
4
+ json
5
+ torchtext
6
+ re
utils/model.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import json
4
+
5
+ def attention(Q, K, V):
6
+ d = K.shape[-1]
7
+ QK = Q @ K.transpose(-2, -1)
8
+ QK_d = QK / (d ** 0.5)
9
+ weights = torch.softmax(QK_d, axis=-1)
10
+ outputs = weights @ V
11
+ return outputs
12
+
13
+ class Attention(torch.nn.Module):
14
+ def __init__(self, emb_dim, n_heads):
15
+ super(Attention, self).__init__()
16
+
17
+ self.emb_dim = emb_dim
18
+ self.n_heads = n_heads
19
+
20
+ def forward(self, X):
21
+
22
+ batch_size, seq_len, emb_dim = X.size() # (batch_size, seq_len, emb_dim)
23
+ n_heads = self.n_heads
24
+ emb_dim_per_head = emb_dim // n_heads
25
+
26
+ assert emb_dim == self.emb_dim
27
+ assert emb_dim_per_head * n_heads == emb_dim
28
+
29
+ X = X.transpose(1, 2)
30
+ output = attention(X, X, X) # (batch_size, n_heads, seq_len, emb_dim_per_head)
31
+ output = output.transpose(1, 2) # (batch_size, seq_len, n_heads, emb_dim_per_head)
32
+ output = output.contiguous().view(batch_size, seq_len, emb_dim) # (batch_size, seq_len, emb_dim)
33
+
34
+ return output
35
+
36
+ class ClassifierAttention(nn.Module):
37
+ def __init__(self, vocab_size, emb_dim, padding_idx, hidden_size, n_layers, attention_heads, hidden_layer_units, dropout):
38
+ super(ClassifierAttention, self).__init__()
39
+
40
+ self.embedding = nn.Embedding(
41
+ num_embeddings = vocab_size,
42
+ embedding_dim = emb_dim,
43
+ padding_idx = padding_idx
44
+ )
45
+
46
+ self.rnn_1 = nn.LSTM(
47
+ emb_dim,
48
+ hidden_size,
49
+ n_layers,
50
+ bidirectional = False,
51
+ batch_first = True,
52
+ )
53
+
54
+ self.attention = Attention(hidden_size, attention_heads)
55
+
56
+ self.rnn_2 = nn.LSTM(
57
+ hidden_size,
58
+ hidden_size,
59
+ n_layers,
60
+ bidirectional = False,
61
+ batch_first = True,
62
+ )
63
+
64
+ self.dropout = nn.Dropout(dropout)
65
+
66
+ hidden_layer_units = [hidden_size, *hidden_layer_units]
67
+ self.hidden_layers = nn.ModuleList([])
68
+ for in_unit, out_unit in zip(hidden_layer_units[:-1], hidden_layer_units[1:]):
69
+ self.hidden_layers.append(nn.Linear(in_unit, out_unit))
70
+ self.hidden_layers.append(nn.ReLU())
71
+ self.hidden_layers.append(self.dropout)
72
+ self.hidden_layers.append(nn.Linear(hidden_layer_units[-1], 1))
73
+
74
+ self.sigmoid = nn.Sigmoid()
75
+
76
+ def forward(self, x):
77
+ # x: (batch_size, seq_len)
78
+
79
+ out = self.embedding(x) # (batch_size, seq_len, emb_dim)
80
+ out, (hidden_state, cell_state) = self.rnn_1(out)
81
+ out = self.attention(out) # (batch_size, seq_len, emb_dim)
82
+ out = self.dropout(out)
83
+ output, (hidden_state, cell_state) = self.rnn_2(out)
84
+ out = hidden_state[-1] # (batch_size, hidden_size)
85
+ out = self.dropout(out)
86
+ # (batch_size, seq_len, hidden_dim)
87
+ # (n_layers*n_direction, batch_size, hidden_size)
88
+ # (n_layers*n_direction, batch_size, hidden_size)
89
+
90
+ for layer in self.hidden_layers:
91
+ out = layer(out)
92
+
93
+ out = self.sigmoid(out) # (batch_size, 1)
94
+ out = out.squeeze(-1) # (batch_size)
95
+
96
+ return out
97
+
98
+ def get_model(model_path, params_path):
99
+ with open(params_path, 'rb') as f:
100
+ params = json.load(f)
101
+
102
+ model = ClassifierAttention(*params)
103
+ model.load_state_dict(torch.load(model_path))
104
+ model.eval()
105
+
106
+ return model
utils/preprocess.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchtext
3
+ import re
4
+
5
+ def clean_text(text):
6
+ # Remove extra spaces
7
+ text = text.strip()
8
+ # Convert multiple spaces to single spaces
9
+ text = re.sub('\s+', ' ', text)
10
+ # Lowercase the text
11
+ text = text.lower()
12
+ # Remove punctuation marks
13
+ text = re.sub('[^\w\s]', '', text)
14
+ return text
15
+
16
+ def get_preprocess(vocab_path):
17
+ tokenizer = torchtext.data.utils.get_tokenizer('basic_english')
18
+ vocab = torch.load(vocab_path)
19
+ return lambda text: vocab(tokenizer(clean_text(text)))