upload better model
Browse files- .gitattributes +2 -0
- README.md +1 -1
- asr.ckpt +2 -2
- hyperparams.yaml +12 -12
- lm.ckpt +2 -2
- normalizer.ckpt +2 -2
- tokenizer.ckpt +2 -2
.gitattributes
CHANGED
@@ -29,3 +29,5 @@ asr.ckpt filter=lfs diff=lfs merge=lfs -text
|
|
29 |
lm.ckpt filter=lfs diff=lfs merge=lfs -text
|
30 |
normalizer.ckpt filter=lfs diff=lfs merge=lfs -text
|
31 |
tokenizer.ckpt filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
29 |
lm.ckpt filter=lfs diff=lfs merge=lfs -text
|
30 |
normalizer.ckpt filter=lfs diff=lfs merge=lfs -text
|
31 |
tokenizer.ckpt filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.ckpt. filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -30,7 +30,7 @@ The performance of the model is the following:
|
|
30 |
|
31 |
| Release | eval clean CER | eval other CER | GPUs |
|
32 |
| :------: | :------------: | :------------: | :---------: |
|
33 |
-
|
|
34 |
|
35 |
## Pipeline description
|
36 |
|
|
|
30 |
|
31 |
| Release | eval clean CER | eval other CER | GPUs |
|
32 |
| :------: | :------------: | :------------: | :---------: |
|
33 |
+
| 22-07-23 | 7.33% | 7.99% | 6xA100 80GB |
|
34 |
|
35 |
## Pipeline description
|
36 |
|
asr.ckpt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e718dc29b403dfaa8d2604c43c3666be3fa99e958b77e3c6ff387e94d4a174c
|
3 |
+
size 184546287
|
hyperparams.yaml
CHANGED
@@ -22,14 +22,14 @@ nhead: 4
|
|
22 |
num_encoder_layers: 12
|
23 |
num_decoder_layers: 6
|
24 |
d_ffn: 2048
|
25 |
-
transformer_dropout: 0.
|
26 |
activation: !name:torch.nn.GELU
|
27 |
output_neurons: 5000
|
28 |
vocab_size: 5000
|
29 |
|
30 |
# Outputs
|
31 |
blank_index: 0
|
32 |
-
label_smoothing: 0.
|
33 |
pad_index: 0
|
34 |
bos_index: 1
|
35 |
eos_index: 2
|
@@ -40,9 +40,9 @@ min_decode_ratio: 0.0
|
|
40 |
max_decode_ratio: 1.0
|
41 |
valid_search_interval: 10
|
42 |
valid_beam_size: 10
|
43 |
-
test_beam_size:
|
44 |
-
lm_weight: 0.
|
45 |
-
ctc_weight_decode: 0.
|
46 |
|
47 |
############################## models ################################
|
48 |
|
@@ -51,12 +51,12 @@ normalizer: !new:speechbrain.processing.features.InputNormalization
|
|
51 |
|
52 |
CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
|
53 |
input_shape: (8, 10, 80)
|
54 |
-
num_blocks:
|
55 |
num_layers_per_block: 1
|
56 |
-
out_channels: (64,
|
57 |
-
kernel_sizes: (
|
58 |
-
strides: (2, 2)
|
59 |
-
residuals: (False, False)
|
60 |
|
61 |
Transformer: !new:speechbrain.lobes.models.transformer.TransformerASR.TransformerASR # yamllint disable-line rule:line-length
|
62 |
input_size: 640
|
@@ -106,8 +106,8 @@ decoder: !new:speechbrain.decoders.S2STransformerBeamSearch
|
|
106 |
ctc_weight: !ref <ctc_weight_decode>
|
107 |
lm_weight: !ref <lm_weight>
|
108 |
lm_modules: !ref <lm_model>
|
109 |
-
temperature: 1.
|
110 |
-
temperature_lm: 1.
|
111 |
using_eos_threshold: False
|
112 |
length_normalization: True
|
113 |
|
|
|
22 |
num_encoder_layers: 12
|
23 |
num_decoder_layers: 6
|
24 |
d_ffn: 2048
|
25 |
+
transformer_dropout: 0.1
|
26 |
activation: !name:torch.nn.GELU
|
27 |
output_neurons: 5000
|
28 |
vocab_size: 5000
|
29 |
|
30 |
# Outputs
|
31 |
blank_index: 0
|
32 |
+
label_smoothing: 0.0
|
33 |
pad_index: 0
|
34 |
bos_index: 1
|
35 |
eos_index: 2
|
|
|
40 |
max_decode_ratio: 1.0
|
41 |
valid_search_interval: 10
|
42 |
valid_beam_size: 10
|
43 |
+
test_beam_size: 66
|
44 |
+
lm_weight: 0.60
|
45 |
+
ctc_weight_decode: 0.50
|
46 |
|
47 |
############################## models ################################
|
48 |
|
|
|
51 |
|
52 |
CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
|
53 |
input_shape: (8, 10, 80)
|
54 |
+
num_blocks: 3
|
55 |
num_layers_per_block: 1
|
56 |
+
out_channels: (64, 64, 64)
|
57 |
+
kernel_sizes: (5, 5, 1)
|
58 |
+
strides: (2, 2, 1)
|
59 |
+
residuals: (False, False, True)
|
60 |
|
61 |
Transformer: !new:speechbrain.lobes.models.transformer.TransformerASR.TransformerASR # yamllint disable-line rule:line-length
|
62 |
input_size: 640
|
|
|
106 |
ctc_weight: !ref <ctc_weight_decode>
|
107 |
lm_weight: !ref <lm_weight>
|
108 |
lm_modules: !ref <lm_model>
|
109 |
+
temperature: 1.15
|
110 |
+
temperature_lm: 1.15
|
111 |
using_eos_threshold: False
|
112 |
length_normalization: True
|
113 |
|
lm.ckpt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f0b49d5e1f9894c0c9f2ec21c8658da8e1a07f509b807e8624450ba19ea667c
|
3 |
+
size 381072461
|
normalizer.ckpt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1da2ced935d955c014177591249e5db497d0c5dc7143e64378da0cb5590fe77a
|
3 |
+
size 1703
|
tokenizer.ckpt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d419e55734c26df6c5690671be2b887a7db389c1a7f63286111ce737508c6569
|
3 |
+
size 313900
|