tawreck-hasaballah
commited on
Commit
•
ec2c920
1
Parent(s):
3644724
End of training
Browse files- README.md +11 -10
- generation_config.json +3 -11
- model.safetensors +1 -1
README.md
CHANGED
@@ -19,8 +19,8 @@ should probably proofread and complete it, then remove this comment. -->
|
|
19 |
|
20 |
This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the ASR-EGARBCSC: AN EGYPTIAN ARABIC CONVERSATIONAL SPEECH CORPUS dataset.
|
21 |
It achieves the following results on the evaluation set:
|
22 |
-
- Loss:
|
23 |
-
- Wer:
|
24 |
|
25 |
## Model description
|
26 |
|
@@ -46,20 +46,21 @@ The following hyperparameters were used during training:
|
|
46 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
47 |
- lr_scheduler_type: constant_with_warmup
|
48 |
- lr_scheduler_warmup_steps: 50
|
49 |
-
- training_steps:
|
50 |
|
51 |
### Training results
|
52 |
|
53 |
-
| Training Loss | Epoch
|
54 |
-
|
55 |
-
|
|
56 |
-
| 0.
|
57 |
-
| 0.
|
|
|
58 |
|
59 |
|
60 |
### Framework versions
|
61 |
|
62 |
-
- Transformers 4.
|
63 |
- Pytorch 2.2.1+cu121
|
64 |
- Datasets 2.18.0
|
65 |
-
- Tokenizers 0.
|
|
|
19 |
|
20 |
This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the ASR-EGARBCSC: AN EGYPTIAN ARABIC CONVERSATIONAL SPEECH CORPUS dataset.
|
21 |
It achieves the following results on the evaluation set:
|
22 |
+
- Loss: 0.5626
|
23 |
+
- Wer: 47.4960
|
24 |
|
25 |
## Model description
|
26 |
|
|
|
46 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
47 |
- lr_scheduler_type: constant_with_warmup
|
48 |
- lr_scheduler_warmup_steps: 50
|
49 |
+
- training_steps: 500
|
50 |
|
51 |
### Training results
|
52 |
|
53 |
+
| Training Loss | Epoch | Step | Validation Loss | Wer |
|
54 |
+
|:-------------:|:------:|:----:|:---------------:|:-------:|
|
55 |
+
| 0.7309 | 0.7267 | 125 | 0.5984 | 52.4512 |
|
56 |
+
| 0.3608 | 1.4535 | 250 | 0.5488 | 48.6031 |
|
57 |
+
| 0.1789 | 2.1802 | 375 | 0.5537 | 46.5999 |
|
58 |
+
| 0.1844 | 2.9070 | 500 | 0.5626 | 47.4960 |
|
59 |
|
60 |
|
61 |
### Framework versions
|
62 |
|
63 |
+
- Transformers 4.40.0
|
64 |
- Pytorch 2.2.1+cu121
|
65 |
- Datasets 2.18.0
|
66 |
+
- Tokenizers 0.19.1
|
generation_config.json
CHANGED
@@ -48,16 +48,6 @@
|
|
48 |
"bos_token_id": 50257,
|
49 |
"decoder_start_token_id": 50258,
|
50 |
"eos_token_id": 50257,
|
51 |
-
"forced_decoder_ids": [
|
52 |
-
[
|
53 |
-
1,
|
54 |
-
null
|
55 |
-
],
|
56 |
-
[
|
57 |
-
2,
|
58 |
-
50359
|
59 |
-
]
|
60 |
-
],
|
61 |
"is_multilingual": true,
|
62 |
"lang_to_id": {
|
63 |
"<|af|>": 50327,
|
@@ -160,6 +150,7 @@
|
|
160 |
"<|yo|>": 50325,
|
161 |
"<|zh|>": 50260
|
162 |
},
|
|
|
163 |
"max_initial_timestamp_index": 50,
|
164 |
"max_length": 448,
|
165 |
"no_timestamps_token_id": 50363,
|
@@ -256,9 +247,10 @@
|
|
256 |
50361,
|
257 |
50362
|
258 |
],
|
|
|
259 |
"task_to_id": {
|
260 |
"transcribe": 50359,
|
261 |
"translate": 50358
|
262 |
},
|
263 |
-
"transformers_version": "4.
|
264 |
}
|
|
|
48 |
"bos_token_id": 50257,
|
49 |
"decoder_start_token_id": 50258,
|
50 |
"eos_token_id": 50257,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
"is_multilingual": true,
|
52 |
"lang_to_id": {
|
53 |
"<|af|>": 50327,
|
|
|
150 |
"<|yo|>": 50325,
|
151 |
"<|zh|>": 50260
|
152 |
},
|
153 |
+
"language": "arabic",
|
154 |
"max_initial_timestamp_index": 50,
|
155 |
"max_length": 448,
|
156 |
"no_timestamps_token_id": 50363,
|
|
|
247 |
50361,
|
248 |
50362
|
249 |
],
|
250 |
+
"task": "transcribe",
|
251 |
"task_to_id": {
|
252 |
"transcribe": 50359,
|
253 |
"translate": 50358
|
254 |
},
|
255 |
+
"transformers_version": "4.40.0"
|
256 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 966995080
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:126e5a3097bebc4510f3139e24f62020f8d05fb7d16552970017b33734468391
|
3 |
size 966995080
|