tawreck-hasaballah
/

whisper-small-eg

@@ -19,8 +19,8 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the ASR-EGARBCSC: AN EGYPTIAN ARABIC CONVERSATIONAL SPEECH CORPUS dataset.
 It achieves the following results on the evaluation set:
-- Loss: 3.2253
-- Wer: 119.8092
 ## Model description
@@ -46,20 +46,21 @@ The following hyperparameters were used during training:
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: constant_with_warmup
 - lr_scheduler_warmup_steps: 50
-- training_steps: 1500
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss | Wer      |
-|:-------------:|:-----:|:----:|:---------------:|:--------:|
-| 1.6565        | 3.27  | 500  | 2.3919          | 121.5667 |
-| 0.5477        | 6.54  | 1000 | 2.8503          | 117.3487 |
-| 0.0882        | 9.8   | 1500 | 3.2253          | 119.8092 |
 ### Framework versions
-- Transformers 4.39.3
 - Pytorch 2.2.1+cu121
 - Datasets 2.18.0
-- Tokenizers 0.15.2

 This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the ASR-EGARBCSC: AN EGYPTIAN ARABIC CONVERSATIONAL SPEECH CORPUS dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.5626
+- Wer: 47.4960
 ## Model description
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: constant_with_warmup
 - lr_scheduler_warmup_steps: 50
+- training_steps: 500
 ### Training results
+| Training Loss | Epoch  | Step | Validation Loss | Wer     |
+|:-------------:|:------:|:----:|:---------------:|:-------:|
+| 0.7309        | 0.7267 | 125  | 0.5984          | 52.4512 |
+| 0.3608        | 1.4535 | 250  | 0.5488          | 48.6031 |
+| 0.1789        | 2.1802 | 375  | 0.5537          | 46.5999 |
+| 0.1844        | 2.9070 | 500  | 0.5626          | 47.4960 |
 ### Framework versions
+- Transformers 4.40.0
 - Pytorch 2.2.1+cu121
 - Datasets 2.18.0
+- Tokenizers 0.19.1

generation_config.json CHANGED Viewed

@@ -48,16 +48,6 @@
   "bos_token_id": 50257,
   "decoder_start_token_id": 50258,
   "eos_token_id": 50257,
-  "forced_decoder_ids": [
-    [
-      1,
-      null
-    ],
-    [
-      2,
-      50359
-    ]
-  ],
   "is_multilingual": true,
   "lang_to_id": {
     "<|af|>": 50327,
@@ -160,6 +150,7 @@
     "<|yo|>": 50325,
     "<|zh|>": 50260
   },
   "max_initial_timestamp_index": 50,
   "max_length": 448,
   "no_timestamps_token_id": 50363,
@@ -256,9 +247,10 @@
     50361,
     50362
   ],
   "task_to_id": {
     "transcribe": 50359,
     "translate": 50358
   },
-  "transformers_version": "4.39.3"
 }

   "bos_token_id": 50257,
   "decoder_start_token_id": 50258,
   "eos_token_id": 50257,
   "is_multilingual": true,
   "lang_to_id": {
     "<|af|>": 50327,
     "<|yo|>": 50325,
     "<|zh|>": 50260
   },
+  "language": "arabic",
   "max_initial_timestamp_index": 50,
   "max_length": 448,
   "no_timestamps_token_id": 50363,
     50361,
     50362
   ],
+  "task": "transcribe",
   "task_to_id": {
     "transcribe": 50359,
     "translate": 50358
   },
+  "transformers_version": "4.40.0"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a355b35c97462e70b3a46bf632b762f1b0a93c59b1dfaf3f018c747424a3c8f3
 size 966995080

 version https://git-lfs.github.com/spec/v1
+oid sha256:126e5a3097bebc4510f3139e24f62020f8d05fb7d16552970017b33734468391
 size 966995080