File size: 2,580 Bytes
e186fb5
5a7f50a
 
 
 
 
 
 
 
e186fb5
 
5a7f50a
 
 
 
4cdda95
5a7f50a
 
 
 
e186fb5
 
 
5a7f50a
e186fb5
5a7f50a
6657904
5a7f50a
 
e186fb5
4cdda95
e186fb5
5a7f50a
6657904
5a7f50a
e186fb5
5a7f50a
 
e186fb5
 
 
 
 
 
 
 
6657904
5a7f50a
 
 
e186fb5
5a7f50a
 
e186fb5
 
5a7f50a
e186fb5
 
 
 
 
5a7f50a
e186fb5
 
 
5a7f50a
e186fb5
 
 
 
 
 
 
 
 
6657904
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import torch.nn as nn
import torch
from transformers import BertModel, BertConfig, PreTrainedModel

def get_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

USE_CUDA = False
device = get_device()
if device.type == 'cuda':
    USE_CUDA = True

base_bert = 'indobenchmark/indobert-base-p2'
HIDDEN_DIM = 768
OUTPUT_DIM = 2 # 2 if Binary Classification
BIDIRECTIONAL = True
DROPOUT = 0.2 # 0.2

class IndoBERTBiLSTM(PreTrainedModel):
    config_class = BertConfig
    def __init__(self, bert_config):
        super().__init__(bert_config)
        self.output_dim = OUTPUT_DIM
        self.n_layers = 1
        self.hidden_dim = HIDDEN_DIM
        self.bidirectional = BIDIRECTIONAL

        self.bert = BertModel.from_pretrained(base_bert)
        self.lstm = nn.LSTM(input_size=self.bert.config.hidden_size,
                            hidden_size=self.hidden_dim,
                            num_layers=self.n_layers,
                            bidirectional=self.bidirectional,
                            batch_first=True)
        self.dropout = nn.Dropout(DROPOUT)
        self.output_layer = nn.Linear(self.hidden_dim * 2 if self.bidirectional else self.hidden_dim, self.output_dim)

    def forward(self, input_ids, attention_mask):

        hidden = self.init_hidden(input_ids.shape[0])
        output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        sequence_output = output.last_hidden_state

        lstm_output, (hidden_last, cn_last) = self.lstm(sequence_output, hidden)

        hidden_last_L=hidden_last[-2]
        hidden_last_R=hidden_last[-1]
        hidden_last_out=torch.cat([hidden_last_L,hidden_last_R],dim=-1) #[16, 1536]

        # apply dropout
        out = self.dropout(hidden_last_out)

        # output layer
        logits = self.output_layer(out)

        return logits

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data

        number = 1
        if self.bidirectional:
            number = 2

        if (USE_CUDA):
            hidden = (weight.new(self.n_layers*number, batch_size, self.hidden_dim).zero_().float().cuda(),
                      weight.new(self.n_layers*number, batch_size, self.hidden_dim).zero_().float().cuda()
                     )
        else:
            hidden = (weight.new(self.n_layers*number, batch_size, self.hidden_dim).zero_().float(),
                      weight.new(self.n_layers*number, batch_size, self.hidden_dim).zero_().float()
                     )

        return hidden