Upload BertForSequenceClassification

Browse files

Files changed (3) hide show

config.json +6 -1
modeling_bert.py +149 -5
pytorch_model.bin +1 -1

config.json CHANGED Viewed

@@ -1,9 +1,13 @@
 {
   "affine": false,
   "alpha": 1,
   "attention_probs_dropout_prob": 0.1,
   "auto_map": {
-    "AutoConfig": "configuration_bert.BertConfig"
   },
   "center": false,
   "classifier_dropout": null,
@@ -27,6 +31,7 @@
   "r": 1,
   "return_mean": true,
   "return_std": true,
   "transformers_version": "4.33.3",
   "type_vocab_size": 2,
   "use_cache": true,

 {
   "affine": false,
   "alpha": 1,
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
   "attention_probs_dropout_prob": 0.1,
   "auto_map": {
+    "AutoConfig": "configuration_bert.BertConfig",
+    "AutoModelForSequenceClassification": "modeling_bert.BertForSequenceClassification"
   },
   "center": false,
   "classifier_dropout": null,
   "r": 1,
   "return_mean": true,
   "return_std": true,
+  "torch_dtype": "float32",
   "transformers_version": "4.33.3",
   "type_vocab_size": 2,
   "use_cache": true,

modeling_bert.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import torch
 import torch.nn as nn
 from typing import Optional, List, Union, Tuple
 from transformers import (
     PretrainedConfig,
@@ -46,21 +48,163 @@ class BertPreTrainedModel(PreTrainedModel):
             module.weight.data.fill_(1.0)
 class BertPooler(nn.Module):
     def __init__(self, config):
         super().__init__()
-        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
         self.activation = nn.Tanh()
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         # We "pool" the model by simply taking the hidden state corresponding
         # to the first token.
-        first_token_tensor = hidden_states[:, 0]
-        pooled_output = self.dense(first_token_tensor)
         pooled_output = self.activation(pooled_output)
         return pooled_output
 class BertModel(BertPreTrainedModel):
@@ -180,7 +324,7 @@ class BertModel(BertPreTrainedModel):
             return_dict=return_dict,
         )
         sequence_output = encoder_outputs[0]
-        pooled_output = self.pooler(sequence_output) if self.pooler is not None else None
         if not return_dict:
             return (sequence_output, pooled_output) + encoder_outputs[1:]

 import torch
 import torch.nn as nn
+import torch.nn.functional as F
+from collections import OrderedDict
 from typing import Optional, List, Union, Tuple
 from transformers import (
     PretrainedConfig,
             module.weight.data.fill_(1.0)
+class PFSA(nn.Module):
+    """
+    https://openreview.net/pdf?id=isodM5jTA7h
+    """
+    def __init__(self, input_dim, alpha=1):
+        super(PFSA, self).__init__()
+        self.input_dim = input_dim
+        self.alpha = alpha
+    def forward(self, x, mask=None):
+        """
+        x: [B, T, F]
+        """
+        x = x.transpose(1, 2)[..., None]
+        k = torch.mean(x, dim=[-1, -2], keepdim=True)
+        kd = torch.sqrt((k - k.mean(dim=1, keepdim=True)).pow(2).sum(dim=1, keepdim=True)) # [B, 1, 1, 1]
+        qd = torch.sqrt((x - x.mean(dim=1, keepdim=True)).pow(2).sum(dim=1, keepdim=True)) # [B, 1, T, 1]
+        C_qk = (((x - x.mean(dim=1, keepdim=True)) * (k - k.mean(dim=1, keepdim=True))).sum(dim=1, keepdim=True)) / (qd * kd)
+        A = (1 - torch.sigmoid(C_qk)) ** self.alpha
+        out = x * A
+        out = out.squeeze(dim=-1).transpose(1, 2)
+        return out
+class PURE(nn.Module):
+    def __init__(
+        self,
+        in_dim,
+        q=5,
+        r=1,
+        center=False,
+        num_iters=1,
+        return_mean=True,
+        return_std=True,
+        normalize=False,
+        do_pcr=True,
+        do_pfsa=True,
+        alpha=1,
+        *args, **kwargs
+    ):
+        super().__init__()
+        self.in_dim = in_dim
+        self.target_rank = q
+        self.num_pc_to_remove = r
+        self.center = center
+        self.num_iters = num_iters
+        self.return_mean = return_mean
+        self.return_std = return_std
+        self.normalize = normalize
+        self.do_pcr = do_pcr
+        self.do_pfsa = do_pfsa
+        # self.attention = SelfAttention(in_dim)
+        self.attention = PFSA(in_dim, alpha=alpha)
+        self.eps = 1e-5
+        if self.normalize:
+            self.norm = nn.Sequential(OrderedDict([
+                ('relu', nn.LeakyReLU(inplace=True)),
+                ('bn', nn.BatchNorm1d(in_dim)),
+            ]))
+    def get_out_dim(self):
+        if self.return_mean and self.return_std:
+            self.out_dim = self.in_dim * 2
+        else:
+            self.out_dim = self.in_dim
+        return self.out_dim
+    def _compute_pc(self, x):
+        """
+        x: (B, T, F)
+        """
+        _, _, V = torch.pca_lowrank(x, q=self.target_rank, center=self.center, niter=self.num_iters)
+        pc = V.transpose(1, 2)[:, :self.num_pc_to_remove, :] # pc: [B, K, F]
+        return pc
+    def forward(self, x, attention_mask=None, *args, **kwargs):
+        """
+        PCR -> Attention
+        x: (B, F, T)
+        """
+        if self.normalize:
+            x = self.norm(x)
+        xt = x.transpose(1, 2)
+        if self.do_pcr:
+            pc = self._compute_pc(xt) # pc: [B, K, F]
+            xx = xt - xt @ pc.transpose(1, 2) @ pc # [B, T, F] * [B, F, K] * [B, K, F] = [B, T, F]
+        else:
+            xx = xt
+        if self.do_pfsa:
+            xx = self.attention(xx, attention_mask)
+        if self.normalize:
+            xx = F.normalize(xx, p=2, dim=2)
+        return xx
 class BertPooler(nn.Module):
     def __init__(self, config):
         super().__init__()
+        self.pure = PURE(
+            config.hidden_size,
+            q=config.q,
+            r=config.r,
+            center=config.center,
+            num_iters=config.num_iters,
+            return_mean=config.return_mean,
+            return_std=config.return_std,
+            normalize=config.normalize,
+            do_pcr=config.do_pcr,
+            do_pfsa=config.do_pfsa,
+            alpha=config.alpha
+        )
+        if config.affine:
+            self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        else:
+            self.dense = nn.Identity()
         self.activation = nn.Tanh()
+        self.eps = 1e-5
+    def forward(self, hidden_states: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
         # We "pool" the model by simply taking the hidden state corresponding
         # to the first token.
+        hidden_states = self.pure(hidden_states.transpose(1, 2), attention_mask)
+        mean_tensor = self.mean_pooling(hidden_states, attention_mask)
+        pooled_output = self.dense(mean_tensor)
         pooled_output = self.activation(pooled_output)
         return pooled_output
+    def _get_gauss_noise(self, shape_of_tensor, device="cpu"):
+        """Returns a tensor of epsilon Gaussian noise.
+        Arguments
+        ---------
+        shape_of_tensor : tensor
+            It represents the size of tensor for generating Gaussian noise.
+        """
+        gnoise = torch.randn(shape_of_tensor, device=device)
+        gnoise -= torch.min(gnoise)
+        gnoise /= torch.max(gnoise)
+        gnoise = self.eps * ((1 - 9) * gnoise + 9)
+        return gnoise
+    def add_noise(self, tensor):
+        gnoise = self._get_gauss_noise(tensor.size(), device=tensor.device)
+        gnoise = gnoise
+        tensor += gnoise
+        return tensor
+    def mean_pooling(self, token_embeddings, attention_mask):
+        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+        mean = torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+        # mean = self.add_noise(mean)
+        return mean
 class BertModel(BertPreTrainedModel):
             return_dict=return_dict,
         )
         sequence_output = encoder_outputs[0]
+        pooled_output = self.pooler(sequence_output, attention_mask) if self.pooler is not None else None
         if not return_dict:
             return (sequence_output, pooled_output) + encoder_outputs[1:]

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:31b5cff2bb6cce0d41eceb729d8660438d177910122749eca6916b3f404c0f80
 size 438000689

 version https://git-lfs.github.com/spec/v1
+oid sha256:64dd3354da4b868afe78cc83d9e51ed4ca20cab88015a22a38257b205c9eadd4
 size 438000689