Upload processor
Browse files- README.md +4 -4
- added_tokens.json +1 -2
- special_tokens_map.json +2 -2
- tokenizer_config.json +4 -4
- vocab.json +1 -0
README.md
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
language:
|
3 |
- ar
|
4 |
license: apache-2.0
|
5 |
-
base_model: tarteel-ai/whisper-base-ar-quran
|
6 |
tags:
|
7 |
- generated_from_trainer
|
|
|
8 |
datasets:
|
9 |
- zolfa
|
10 |
metrics:
|
@@ -13,16 +13,16 @@ model-index:
|
|
13 |
- name: Whisper-raghadomar
|
14 |
results:
|
15 |
- task:
|
16 |
-
name: Automatic Speech Recognition
|
17 |
type: automatic-speech-recognition
|
|
|
18 |
dataset:
|
19 |
name: Zolfa Dataset
|
20 |
type: zolfa
|
21 |
args: 'config: ar, split: test'
|
22 |
metrics:
|
23 |
-
-
|
24 |
-
type: wer
|
25 |
value: 6.896551724137931
|
|
|
26 |
---
|
27 |
|
28 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
|
|
2 |
language:
|
3 |
- ar
|
4 |
license: apache-2.0
|
|
|
5 |
tags:
|
6 |
- generated_from_trainer
|
7 |
+
base_model: tarteel-ai/whisper-base-ar-quran
|
8 |
datasets:
|
9 |
- zolfa
|
10 |
metrics:
|
|
|
13 |
- name: Whisper-raghadomar
|
14 |
results:
|
15 |
- task:
|
|
|
16 |
type: automatic-speech-recognition
|
17 |
+
name: Automatic Speech Recognition
|
18 |
dataset:
|
19 |
name: Zolfa Dataset
|
20 |
type: zolfa
|
21 |
args: 'config: ar, split: test'
|
22 |
metrics:
|
23 |
+
- type: wer
|
|
|
24 |
value: 6.896551724137931
|
25 |
+
name: Wer
|
26 |
---
|
27 |
|
28 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
added_tokens.json
CHANGED
@@ -17,7 +17,6 @@
|
|
17 |
"<|da|>": 50285,
|
18 |
"<|de|>": 50261,
|
19 |
"<|el|>": 50281,
|
20 |
-
"<|endoftext|>": 50257,
|
21 |
"<|en|>": 50259,
|
22 |
"<|es|>": 50262,
|
23 |
"<|et|>": 50307,
|
@@ -30,6 +29,7 @@
|
|
30 |
"<|gu|>": 50333,
|
31 |
"<|haw|>": 50352,
|
32 |
"<|ha|>": 50354,
|
|
|
33 |
"<|hi|>": 50276,
|
34 |
"<|hr|>": 50291,
|
35 |
"<|ht|>": 50339,
|
@@ -38,7 +38,6 @@
|
|
38 |
"<|id|>": 50275,
|
39 |
"<|is|>": 50311,
|
40 |
"<|it|>": 50274,
|
41 |
-
"<|iw|>": 50279,
|
42 |
"<|ja|>": 50266,
|
43 |
"<|jw|>": 50356,
|
44 |
"<|ka|>": 50329,
|
|
|
17 |
"<|da|>": 50285,
|
18 |
"<|de|>": 50261,
|
19 |
"<|el|>": 50281,
|
|
|
20 |
"<|en|>": 50259,
|
21 |
"<|es|>": 50262,
|
22 |
"<|et|>": 50307,
|
|
|
29 |
"<|gu|>": 50333,
|
30 |
"<|haw|>": 50352,
|
31 |
"<|ha|>": 50354,
|
32 |
+
"<|he|>": 50279,
|
33 |
"<|hi|>": 50276,
|
34 |
"<|hr|>": 50291,
|
35 |
"<|ht|>": 50339,
|
|
|
38 |
"<|id|>": 50275,
|
39 |
"<|is|>": 50311,
|
40 |
"<|it|>": 50274,
|
|
|
41 |
"<|ja|>": 50266,
|
42 |
"<|jw|>": 50356,
|
43 |
"<|ka|>": 50329,
|
special_tokens_map.json
CHANGED
@@ -22,7 +22,7 @@
|
|
22 |
"<|hi|>",
|
23 |
"<|fi|>",
|
24 |
"<|vi|>",
|
25 |
-
"<|
|
26 |
"<|uk|>",
|
27 |
"<|el|>",
|
28 |
"<|ms|>",
|
@@ -130,7 +130,7 @@
|
|
130 |
"single_word": false
|
131 |
},
|
132 |
"unk_token": {
|
133 |
-
"content": "",
|
134 |
"lstrip": false,
|
135 |
"normalized": true,
|
136 |
"rstrip": false,
|
|
|
22 |
"<|hi|>",
|
23 |
"<|fi|>",
|
24 |
"<|vi|>",
|
25 |
+
"<|he|>",
|
26 |
"<|uk|>",
|
27 |
"<|el|>",
|
28 |
"<|ms|>",
|
|
|
130 |
"single_word": false
|
131 |
},
|
132 |
"unk_token": {
|
133 |
+
"content": "<|endoftext|>",
|
134 |
"lstrip": false,
|
135 |
"normalized": true,
|
136 |
"rstrip": false,
|
tokenizer_config.json
CHANGED
@@ -179,7 +179,7 @@
|
|
179 |
"special": true
|
180 |
},
|
181 |
"50279": {
|
182 |
-
"content": "<|
|
183 |
"lstrip": false,
|
184 |
"normalized": false,
|
185 |
"rstrip": false,
|
@@ -882,7 +882,7 @@
|
|
882 |
"<|hi|>",
|
883 |
"<|fi|>",
|
884 |
"<|vi|>",
|
885 |
-
"<|
|
886 |
"<|uk|>",
|
887 |
"<|el|>",
|
888 |
"<|ms|>",
|
@@ -972,10 +972,10 @@
|
|
972 |
"clean_up_tokenization_spaces": true,
|
973 |
"eos_token": "<|endoftext|>",
|
974 |
"errors": "replace",
|
975 |
-
"model_max_length":
|
976 |
"pad_token": "<|endoftext|>",
|
977 |
"processor_class": "WhisperProcessor",
|
978 |
"return_attention_mask": false,
|
979 |
"tokenizer_class": "WhisperTokenizer",
|
980 |
-
"unk_token": ""
|
981 |
}
|
|
|
179 |
"special": true
|
180 |
},
|
181 |
"50279": {
|
182 |
+
"content": "<|he|>",
|
183 |
"lstrip": false,
|
184 |
"normalized": false,
|
185 |
"rstrip": false,
|
|
|
882 |
"<|hi|>",
|
883 |
"<|fi|>",
|
884 |
"<|vi|>",
|
885 |
+
"<|he|>",
|
886 |
"<|uk|>",
|
887 |
"<|el|>",
|
888 |
"<|ms|>",
|
|
|
972 |
"clean_up_tokenization_spaces": true,
|
973 |
"eos_token": "<|endoftext|>",
|
974 |
"errors": "replace",
|
975 |
+
"model_max_length": 1024,
|
976 |
"pad_token": "<|endoftext|>",
|
977 |
"processor_class": "WhisperProcessor",
|
978 |
"return_attention_mask": false,
|
979 |
"tokenizer_class": "WhisperTokenizer",
|
980 |
+
"unk_token": "<|endoftext|>"
|
981 |
}
|
vocab.json
CHANGED
@@ -314,6 +314,7 @@
|
|
314 |
";;": 35746,
|
315 |
"<": 27,
|
316 |
"</": 3433,
|
|
|
317 |
"=": 28,
|
318 |
"=\"": 13114,
|
319 |
"=\"#": 34106,
|
|
|
314 |
";;": 35746,
|
315 |
"<": 27,
|
316 |
"</": 3433,
|
317 |
+
"<|endoftext|>": 50257,
|
318 |
"=": 28,
|
319 |
"=\"": 13114,
|
320 |
"=\"#": 34106,
|