Leotrim commited on
Commit
05af260
1 Parent(s): ad7e92b

End of training

Browse files
Files changed (5) hide show
  1. README.md +23 -22
  2. config.json +11 -9
  3. generation_config.json +10 -26
  4. model.safetensors +2 -2
  5. training_args.bin +1 -1
README.md CHANGED
@@ -1,42 +1,40 @@
1
  ---
2
- language:
3
- - dv
4
  license: apache-2.0
5
- base_model: openai/whisper-small
6
  tags:
7
  - generated_from_trainer
8
  datasets:
9
- - mozilla-foundation/common_voice_13_0
10
  metrics:
11
  - wer
12
  model-index:
13
- - name: Whisper-Small-Dv-fine-tuned
14
  results:
15
  - task:
16
  name: Automatic Speech Recognition
17
  type: automatic-speech-recognition
18
  dataset:
19
- name: Common Voice 13
20
- type: mozilla-foundation/common_voice_13_0
21
- config: dv
22
- split: test
23
- args: dv
24
  metrics:
25
  - name: Wer
26
  type: wer
27
- value: 12.621274820043816
28
  ---
29
 
30
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
31
  should probably proofread and complete it, then remove this comment. -->
32
 
33
- # Whisper-Small-Dv-fine-tuned
34
 
35
- This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the Common Voice 13 dataset.
36
  It achieves the following results on the evaluation set:
37
- - Loss: 0.1648
38
- - Wer Ortho: 60.1574
39
- - Wer: 12.6213
40
 
41
  ## Model description
42
 
@@ -56,20 +54,23 @@ More information needed
56
 
57
  The following hyperparameters were used during training:
58
  - learning_rate: 1e-05
59
- - train_batch_size: 32
60
- - eval_batch_size: 32
61
  - seed: 42
 
 
62
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
63
  - lr_scheduler_type: constant_with_warmup
64
  - lr_scheduler_warmup_steps: 50
65
- - training_steps: 500
66
  - mixed_precision_training: Native AMP
67
 
68
  ### Training results
69
 
70
- | Training Loss | Epoch | Step | Validation Loss | Wer Ortho | Wer |
71
- |:-------------:|:------:|:----:|:---------------:|:---------:|:-------:|
72
- | 0.0698 | 3.2468 | 500 | 0.1648 | 60.1574 | 12.6213 |
 
73
 
74
 
75
  ### Framework versions
 
1
  ---
 
 
2
  license: apache-2.0
3
+ base_model: openai/whisper-tiny
4
  tags:
5
  - generated_from_trainer
6
  datasets:
7
+ - PolyAI/minds14
8
  metrics:
9
  - wer
10
  model-index:
11
+ - name: whisper-small-dv
12
  results:
13
  - task:
14
  name: Automatic Speech Recognition
15
  type: automatic-speech-recognition
16
  dataset:
17
+ name: PolyAI/minds14
18
+ type: PolyAI/minds14
19
+ config: en-US
20
+ split: train
21
+ args: en-US
22
  metrics:
23
  - name: Wer
24
  type: wer
25
+ value: 35.6091030789826
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
29
  should probably proofread and complete it, then remove this comment. -->
30
 
31
+ # whisper-small-dv
32
 
33
+ This model is a fine-tuned version of [openai/whisper-tiny](https://huggingface.co/openai/whisper-tiny) on the PolyAI/minds14 dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.7160
36
+ - Wer Ortho: 36.2369
37
+ - Wer: 35.6091
38
 
39
  ## Model description
40
 
 
54
 
55
  The following hyperparameters were used during training:
56
  - learning_rate: 1e-05
57
+ - train_batch_size: 8
58
+ - eval_batch_size: 8
59
  - seed: 42
60
+ - gradient_accumulation_steps: 4
61
+ - total_train_batch_size: 32
62
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
63
  - lr_scheduler_type: constant_with_warmup
64
  - lr_scheduler_warmup_steps: 50
65
+ - training_steps: 200
66
  - mixed_precision_training: Native AMP
67
 
68
  ### Training results
69
 
70
+ | Training Loss | Epoch | Step | Validation Loss | Wer Ortho | Wer |
71
+ |:-------------:|:-------:|:----:|:---------------:|:---------:|:-------:|
72
+ | 0.2296 | 7.1429 | 100 | 0.5760 | 34.1463 | 33.7349 |
73
+ | 0.0048 | 14.2857 | 200 | 0.7160 | 36.2369 | 35.6091 |
74
 
75
 
76
  ### Framework versions
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "openai/whisper-small",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
@@ -13,17 +13,17 @@
13
  ],
14
  "bos_token_id": 50257,
15
  "classifier_proj_size": 256,
16
- "d_model": 768,
17
- "decoder_attention_heads": 12,
18
- "decoder_ffn_dim": 3072,
19
  "decoder_layerdrop": 0.0,
20
- "decoder_layers": 12,
21
  "decoder_start_token_id": 50258,
22
  "dropout": 0.0,
23
- "encoder_attention_heads": 12,
24
- "encoder_ffn_dim": 3072,
25
  "encoder_layerdrop": 0.0,
26
- "encoder_layers": 12,
27
  "eos_token_id": 50257,
28
  "forced_decoder_ids": [
29
  [
@@ -52,7 +52,7 @@
52
  "max_target_positions": 448,
53
  "median_filter_width": 7,
54
  "model_type": "whisper",
55
- "num_hidden_layers": 12,
56
  "num_mel_bins": 80,
57
  "pad_token_id": 50257,
58
  "scale_embedding": false,
@@ -140,6 +140,8 @@
140
  49870,
141
  50254,
142
  50258,
 
 
143
  50360,
144
  50361,
145
  50362
 
1
  {
2
+ "_name_or_path": "openai/whisper-tiny",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
 
13
  ],
14
  "bos_token_id": 50257,
15
  "classifier_proj_size": 256,
16
+ "d_model": 384,
17
+ "decoder_attention_heads": 6,
18
+ "decoder_ffn_dim": 1536,
19
  "decoder_layerdrop": 0.0,
20
+ "decoder_layers": 4,
21
  "decoder_start_token_id": 50258,
22
  "dropout": 0.0,
23
+ "encoder_attention_heads": 6,
24
+ "encoder_ffn_dim": 1536,
25
  "encoder_layerdrop": 0.0,
26
+ "encoder_layers": 4,
27
  "eos_token_id": 50257,
28
  "forced_decoder_ids": [
29
  [
 
52
  "max_target_positions": 448,
53
  "median_filter_width": 7,
54
  "model_type": "whisper",
55
+ "num_hidden_layers": 4,
56
  "num_mel_bins": 80,
57
  "pad_token_id": 50257,
58
  "scale_embedding": false,
 
140
  49870,
141
  50254,
142
  50258,
143
+ 50358,
144
+ 50359,
145
  50360,
146
  50361,
147
  50362
generation_config.json CHANGED
@@ -1,43 +1,27 @@
1
  {
2
  "alignment_heads": [
3
  [
4
- 5,
5
- 3
6
- ],
7
- [
8
- 5,
9
- 9
10
  ],
11
  [
12
- 8,
13
  0
14
  ],
15
  [
16
- 8,
17
- 4
18
- ],
19
- [
20
- 8,
21
- 7
22
  ],
23
  [
24
- 8,
25
- 8
26
- ],
27
- [
28
- 9,
29
- 0
30
- ],
31
- [
32
- 9,
33
- 7
34
  ],
35
  [
36
- 9,
37
- 9
38
  ],
39
  [
40
- 10,
41
  5
42
  ]
43
  ],
 
1
  {
2
  "alignment_heads": [
3
  [
4
+ 2,
5
+ 2
 
 
 
 
6
  ],
7
  [
8
+ 3,
9
  0
10
  ],
11
  [
12
+ 3,
13
+ 2
 
 
 
 
14
  ],
15
  [
16
+ 3,
17
+ 3
 
 
 
 
 
 
 
 
18
  ],
19
  [
20
+ 3,
21
+ 4
22
  ],
23
  [
24
+ 3,
25
  5
26
  ]
27
  ],
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:927fbd1f5fd870a3b83268986757002a921320d684c64a212def802a11cacdb1
3
- size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e402f541632d00e2956790750390c78a6094c78d742b62b91187f976d0862c2e
3
+ size 151061672
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:161cad3ce9212d9981716768b0f7cf7d7baeded183afbddebf680b12dd26e8ad
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a58d979b65d06f10ff7201a06c65afc6270fd97fc5e055e99a8af7a35d67c48f
3
  size 5304