huseinzol05 commited on
Commit
7757057
1 Parent(s): f6c9873

Upload ConformerEncoder

Browse files
Files changed (3) hide show
  1. config.json +24 -0
  2. conformer.py +66 -0
  3. model.safetensors +3 -0
config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ConformerEncoder"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "conformer.ConformerConfig",
7
+ "AutoModel": "conformer.ConformerEncoder"
8
+ },
9
+ "conformer_depthwise_conv_kernel_size": 31,
10
+ "conformer_dropout": 0.0,
11
+ "conformer_ffn_dim": 576,
12
+ "conformer_input_dim": 144,
13
+ "conformer_num_heads": 4,
14
+ "conformer_num_layers": 2,
15
+ "ctc_loss_reduction": "mean",
16
+ "ctc_zero_infinity": true,
17
+ "input_dim": 80,
18
+ "model_type": "conformer",
19
+ "output_dim": 40,
20
+ "pad_token_id": 39,
21
+ "time_reduction_stride": 4,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.35.2"
24
+ }
conformer.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torchaudio.models import Conformer
2
+ from torchaudio.models.rnnt import _TimeReduction
3
+ from transformers import PretrainedConfig, PreTrainedModel
4
+ import torch
5
+ from torch import nn
6
+ from typing import List, Tuple, Optional
7
+
8
+
9
+ class ConformerConfig(PretrainedConfig):
10
+ model_type = 'conformer'
11
+
12
+
13
+ class ConformerEncoder(PreTrainedModel):
14
+ config_class = ConformerConfig
15
+
16
+ def __init__(
17
+ self,
18
+ config,
19
+ ) -> None:
20
+ super().__init__(config)
21
+ self.time_reduction = _TimeReduction(config.time_reduction_stride)
22
+ self.input_linear = torch.nn.Linear(
23
+ config.input_dim * config.time_reduction_stride,
24
+ config.conformer_input_dim)
25
+ self.conformer = Conformer(
26
+ num_layers=config.conformer_num_layers,
27
+ input_dim=config.conformer_input_dim,
28
+ ffn_dim=config.conformer_ffn_dim,
29
+ num_heads=config.conformer_num_heads,
30
+ depthwise_conv_kernel_size=config.conformer_depthwise_conv_kernel_size,
31
+ dropout=config.conformer_dropout,
32
+ use_group_norm=True,
33
+ convolution_first=True,
34
+ )
35
+ self.output_linear = torch.nn.Linear(config.conformer_input_dim, config.output_dim)
36
+
37
+ def forward(self, inputs, lengths, labels=None):
38
+ time_reduction_out, time_reduction_lengths = self.time_reduction(inputs, lengths)
39
+ input_linear_out = self.input_linear(time_reduction_out)
40
+ x, input_lengths = self.conformer(input_linear_out, time_reduction_lengths)
41
+ logits = self.output_linear(x)
42
+
43
+ loss = None
44
+ if labels is not None:
45
+ labels_mask = labels >= 0
46
+ target_lengths = labels_mask.sum(-1)
47
+ flattened_targets = labels.masked_select(labels_mask)
48
+ log_probs = nn.functional.log_softmax(
49
+ logits,
50
+ dim=-1,
51
+ dtype=torch.float32
52
+ ).transpose(0, 1)
53
+
54
+ with torch.backends.cudnn.flags(enabled=False):
55
+ loss = nn.functional.ctc_loss(
56
+ log_probs,
57
+ flattened_targets,
58
+ input_lengths,
59
+ target_lengths,
60
+ blank=self.config.pad_token_id,
61
+ reduction=self.config.ctc_loss_reduction,
62
+ zero_infinity=self.config.ctc_zero_infinity,
63
+ )
64
+
65
+ output = (logits, input_lengths)
66
+ return ((loss,) + output) if loss is not None else output
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce315d1b3778e7a6121c36cf3130fb22dff0e930d1353f0073b80945aabd9fea
3
+ size 4101424