bakrianoo commited on
Commit
ddce2c2
1 Parent(s): e3ce0fb

update the model

Browse files
README.md CHANGED
@@ -22,16 +22,13 @@ model-index:
22
  name: Common Voice ar
23
  args: ar
24
  metrics:
25
- - type: wer # Required. Example: wer
26
- value: 0.18 # Required. Example: 20.90
27
- name: Test WER # Optional. Example: Test WER
28
 
29
- - type: cer # Required. Example: wer
30
- value: 0.051 # Required. Example: 20.90
31
- name: Test CER # Optional. Example: Test WER
32
-
33
- WER: 0.18855042016806722
34
- CER: 0.05138746531806014
35
 
36
  ---
37
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -41,12 +38,16 @@ should probably proofread and complete it, then remove this comment. -->
41
 
42
  # نموذج **صوت سيناء** للتعرف على الأصوات العربية الفصحى و تحويلها إلى نصوص
43
 
44
- This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on the common_voice 8 dataset.
 
 
 
45
 
46
  It achieves the following results on the evaluation set:
47
- - Loss: 0.22
48
- - Wer: 0.189
49
- - Cer: 0.051
 
50
 
51
  #### Evaluation Commands
52
  1. To evaluate on `mozilla-foundation/common_voice_8_0` with split `test`
@@ -95,11 +96,41 @@ The following hyperparameters were used during training:
95
  - train_batch_size: 32
96
  - eval_batch_size: 10
97
  - seed: 42
98
- - gradient_accumulation_steps: 2
99
- - total_train_batch_size: 128
 
 
100
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
101
  - lr_scheduler_type: linear
102
  - lr_scheduler_warmup_steps: 1000
103
- - num_epochs: 8.32
104
  - mixed_precision_training: Native AMP
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  name: Common Voice ar
23
  args: ar
24
  metrics:
25
+ - type: wer
26
+ value: 0.181
27
+ name: Test WER
28
 
29
+ - type: cer
30
+ value: 0.049
31
+ name: Test CER
 
 
 
32
 
33
  ---
34
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
38
 
39
  # نموذج **صوت سيناء** للتعرف على الأصوات العربية الفصحى و تحويلها إلى نصوص
40
 
41
+ This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on the MOZILLA-FOUNDATION/COMMON_VOICE_8_0 - AR dataset.
42
+ It achieves the following results on the evaluation set:
43
+ - Loss: 0.2141
44
+ - Wer: 0.1808
45
 
46
  It achieves the following results on the evaluation set:
47
+ - eval_loss = 0.2141
48
+ - eval_samples = 10388
49
+ - eval_wer = 0.181
50
+ - eval_cer = 0.049
51
 
52
  #### Evaluation Commands
53
  1. To evaluate on `mozilla-foundation/common_voice_8_0` with split `test`
 
96
  - train_batch_size: 32
97
  - eval_batch_size: 10
98
  - seed: 42
99
+ - distributed_type: multi-GPU
100
+ - num_devices: 8
101
+ - total_train_batch_size: 256
102
+ - total_eval_batch_size: 80
103
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
104
  - lr_scheduler_type: linear
105
  - lr_scheduler_warmup_steps: 1000
106
+ - num_epochs: 10
107
  - mixed_precision_training: Native AMP
108
 
109
+
110
+ ### Training results
111
+
112
+ | Training Loss | Epoch | Step | Validation Loss | Wer |
113
+ |:-------------:|:-----:|:-----:|:---------------:|:------:|
114
+ | 1.354 | 0.64 | 1000 | 0.4109 | 0.4493 |
115
+ | 0.5886 | 1.28 | 2000 | 0.2798 | 0.3099 |
116
+ | 0.4977 | 1.92 | 3000 | 0.2387 | 0.2673 |
117
+ | 0.4253 | 2.56 | 4000 | 0.2266 | 0.2523 |
118
+ | 0.3942 | 3.2 | 5000 | 0.2171 | 0.2437 |
119
+ | 0.3619 | 3.84 | 6000 | 0.2076 | 0.2253 |
120
+ | 0.3245 | 4.48 | 7000 | 0.2088 | 0.2186 |
121
+ | 0.308 | 5.12 | 8000 | 0.2086 | 0.2206 |
122
+ | 0.2881 | 5.76 | 9000 | 0.2089 | 0.2105 |
123
+ | 0.2557 | 6.4 | 10000 | 0.2015 | 0.2004 |
124
+ | 0.248 | 7.04 | 11000 | 0.2044 | 0.1953 |
125
+ | 0.2251 | 7.68 | 12000 | 0.2058 | 0.1932 |
126
+ | 0.2052 | 8.32 | 13000 | 0.2117 | 0.1878 |
127
+ | 0.1976 | 8.96 | 14000 | 0.2104 | 0.1825 |
128
+ | 0.1845 | 9.6 | 15000 | 0.2156 | 0.1821 |
129
+
130
+
131
+ ### Framework versions
132
+
133
+ - Transformers 4.16.2
134
+ - Pytorch 1.10.2+cu113
135
+ - Datasets 1.18.3
136
+ - Tokenizers 0.11.0
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_loss": 0.21412786841392517,
4
+ "eval_runtime": 70.9089,
5
+ "eval_samples": 10388,
6
+ "eval_samples_per_second": 146.498,
7
+ "eval_steps_per_second": 1.833,
8
+ "eval_wer": 0.18078979457836977,
9
+ "train_loss": 0.1316310991176183,
10
+ "train_runtime": 23113.6031,
11
+ "train_samples": 399991,
12
+ "train_samples_per_second": 173.054,
13
+ "train_steps_per_second": 0.676
14
+ }
config.json CHANGED
@@ -6,7 +6,7 @@
6
  "add_adapter": false,
7
  "apply_spec_augment": true,
8
  "architectures": [
9
- "Wav2Vec2ForPreTraining"
10
  ],
11
  "attention_dropout": 0.0,
12
  "bos_token_id": 1,
 
6
  "add_adapter": false,
7
  "apply_spec_augment": true,
8
  "architectures": [
9
+ "Wav2Vec2ForCTC"
10
  ],
11
  "attention_dropout": 0.0,
12
  "bos_token_id": 1,
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_loss": 0.21412786841392517,
4
+ "eval_runtime": 70.9089,
5
+ "eval_samples": 10388,
6
+ "eval_samples_per_second": 146.498,
7
+ "eval_steps_per_second": 1.833,
8
+ "eval_wer": 0.18078979457836977
9
+ }
log_mozilla-foundation_common_voice_8_0_ar_test_predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
mozilla-foundation_common_voice_8_0_ar_test_eval_results.txt CHANGED
@@ -1,2 +1,2 @@
1
- WER: 0.18855042016806722
2
- CER: 0.05138746531806014
 
1
+ WER: 0.18172268907563024
2
+ CER: 0.04875182561226061
optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2720401989ca4dbdc57ad7199cdc4116d02c36c374567e9a1295b01cd6c45b3
3
- size 625461417
 
 
 
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3299e75237418a55c403fc9c78801934d3057ff076e484879bc40bfd516f13ba
3
  size 1262112241
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:588e6341d51008b353be1115b1e1e34d86bad4f676b32277cba57e5f7cff526a
3
  size 1262112241
rng_state_0.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:39223200ea0fd8350b41518c6492d3d14d96d823ca3f6e9a374864389e6dbb1b
3
- size 14503
 
 
 
 
rng_state_1.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5091f67e4a1caff5227497b7c37712739cd8baf0783971573e46569913fdfff3
3
- size 14567
 
 
 
 
rng_state_2.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:16de60f262d42506479df2d553aa073b64cf621ae9eebfc35fbc2b23021cee2d
3
- size 14503
 
 
 
 
rng_state_3.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a641931222bcf2b19d6a073bd70c446b59abafb3a4077c2d6a5aea1f4001e06a
3
- size 14503
 
 
 
 
rng_state_4.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:da3f79cf3a4b4d9379e47a4353e6555235b22515af826d448849df6d7a5f04f0
3
- size 14567
 
 
 
 
rng_state_5.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:16352761baa415d3af2ff3cea0ef555419a76b51449f27bcec56bcdba9a15ff5
3
- size 14567
 
 
 
 
rng_state_6.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ea93d555065f78f93a91b315329c1c392289d935801de6b732e124d18b1586a
3
- size 14567
 
 
 
 
rng_state_7.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:46629c3fa6cb14bd2274cb93b2b9a613c84794d3912a7fb43cf8af9b51524544
3
- size 14503
 
 
 
 
scaler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:431319fd21daa87636d8253400f763a0a1a5400306ce1db9e67b38942c76551d
3
- size 559
 
 
 
 
scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f90f0da980bf8cd346163915335c996f7bbdbcfe93a2e624a8b02f902aa6d01
3
- size 623
 
 
 
 
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "train_loss": 0.1316310991176183,
4
+ "train_runtime": 23113.6031,
5
+ "train_samples": 399991,
6
+ "train_samples_per_second": 173.054,
7
+ "train_steps_per_second": 0.676
8
+ }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.18776850201669637,
3
- "best_model_checkpoint": "/workspace/cv-corpus-8.0-2022-01-19/output/checkpoint-13000",
4
- "epoch": 8.317338451695457,
5
- "global_step": 13000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -279,11 +279,68 @@
279
  "eval_steps_per_second": 1.836,
280
  "eval_wer": 0.18776850201669637,
281
  "step": 13000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  }
283
  ],
284
  "max_steps": 15630,
285
  "num_train_epochs": 10,
286
- "total_flos": 8.268631785694088e+20,
287
  "trial_name": null,
288
  "trial_params": null
289
  }
 
1
  {
2
+ "best_metric": 0.4493387111903199,
3
+ "best_model_checkpoint": "/workspace/cv-corpus-8.0-2022-01-19/output/checkpoint-1000",
4
+ "epoch": 10.0,
5
+ "global_step": 15630,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
279
  "eval_steps_per_second": 1.836,
280
  "eval_wer": 0.18776850201669637,
281
  "step": 13000
282
+ },
283
+ {
284
+ "epoch": 8.64,
285
+ "learning_rate": 2.9241285030758714e-05,
286
+ "loss": 0.2026,
287
+ "step": 13500
288
+ },
289
+ {
290
+ "epoch": 8.96,
291
+ "learning_rate": 2.2406015037593985e-05,
292
+ "loss": 0.1976,
293
+ "step": 14000
294
+ },
295
+ {
296
+ "epoch": 8.96,
297
+ "eval_loss": 0.21043309569358826,
298
+ "eval_runtime": 71.1895,
299
+ "eval_samples_per_second": 145.92,
300
+ "eval_steps_per_second": 1.826,
301
+ "eval_wer": 0.18249695150548728,
302
+ "step": 14000
303
+ },
304
+ {
305
+ "epoch": 9.28,
306
+ "learning_rate": 1.5570745044429256e-05,
307
+ "loss": 0.1875,
308
+ "step": 14500
309
+ },
310
+ {
311
+ "epoch": 9.6,
312
+ "learning_rate": 8.735475051264526e-06,
313
+ "loss": 0.1845,
314
+ "step": 15000
315
+ },
316
+ {
317
+ "epoch": 9.6,
318
+ "eval_loss": 0.21563756465911865,
319
+ "eval_runtime": 71.0722,
320
+ "eval_samples_per_second": 146.161,
321
+ "eval_steps_per_second": 1.829,
322
+ "eval_wer": 0.18212175218084609,
323
+ "step": 15000
324
+ },
325
+ {
326
+ "epoch": 9.92,
327
+ "learning_rate": 1.9138755980861244e-06,
328
+ "loss": 0.1837,
329
+ "step": 15500
330
+ },
331
+ {
332
+ "epoch": 10.0,
333
+ "step": 15630,
334
+ "total_flos": 9.942412569719006e+20,
335
+ "train_loss": 0.1316310991176183,
336
+ "train_runtime": 23113.6031,
337
+ "train_samples_per_second": 173.054,
338
+ "train_steps_per_second": 0.676
339
  }
340
  ],
341
  "max_steps": 15630,
342
  "num_train_epochs": 10,
343
+ "total_flos": 9.942412569719006e+20,
344
  "trial_name": null,
345
  "trial_params": null
346
  }