raghadOmar commited on
Commit
6702e71
·
verified ·
1 Parent(s): 3b304d8

Upload processor

Browse files
README.md CHANGED
@@ -2,9 +2,9 @@
2
  language:
3
  - ar
4
  license: apache-2.0
5
- base_model: tarteel-ai/whisper-base-ar-quran
6
  tags:
7
  - generated_from_trainer
 
8
  datasets:
9
  - zolfa
10
  metrics:
@@ -13,16 +13,16 @@ model-index:
13
  - name: Whisper-raghadomar
14
  results:
15
  - task:
16
- name: Automatic Speech Recognition
17
  type: automatic-speech-recognition
 
18
  dataset:
19
  name: Zolfa Dataset
20
  type: zolfa
21
  args: 'config: ar, split: test'
22
  metrics:
23
- - name: Wer
24
- type: wer
25
  value: 6.896551724137931
 
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
2
  language:
3
  - ar
4
  license: apache-2.0
 
5
  tags:
6
  - generated_from_trainer
7
+ base_model: tarteel-ai/whisper-base-ar-quran
8
  datasets:
9
  - zolfa
10
  metrics:
 
13
  - name: Whisper-raghadomar
14
  results:
15
  - task:
 
16
  type: automatic-speech-recognition
17
+ name: Automatic Speech Recognition
18
  dataset:
19
  name: Zolfa Dataset
20
  type: zolfa
21
  args: 'config: ar, split: test'
22
  metrics:
23
+ - type: wer
 
24
  value: 6.896551724137931
25
+ name: Wer
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
added_tokens.json CHANGED
@@ -17,7 +17,6 @@
17
  "<|da|>": 50285,
18
  "<|de|>": 50261,
19
  "<|el|>": 50281,
20
- "<|endoftext|>": 50257,
21
  "<|en|>": 50259,
22
  "<|es|>": 50262,
23
  "<|et|>": 50307,
@@ -30,6 +29,7 @@
30
  "<|gu|>": 50333,
31
  "<|haw|>": 50352,
32
  "<|ha|>": 50354,
 
33
  "<|hi|>": 50276,
34
  "<|hr|>": 50291,
35
  "<|ht|>": 50339,
@@ -38,7 +38,6 @@
38
  "<|id|>": 50275,
39
  "<|is|>": 50311,
40
  "<|it|>": 50274,
41
- "<|iw|>": 50279,
42
  "<|ja|>": 50266,
43
  "<|jw|>": 50356,
44
  "<|ka|>": 50329,
 
17
  "<|da|>": 50285,
18
  "<|de|>": 50261,
19
  "<|el|>": 50281,
 
20
  "<|en|>": 50259,
21
  "<|es|>": 50262,
22
  "<|et|>": 50307,
 
29
  "<|gu|>": 50333,
30
  "<|haw|>": 50352,
31
  "<|ha|>": 50354,
32
+ "<|he|>": 50279,
33
  "<|hi|>": 50276,
34
  "<|hr|>": 50291,
35
  "<|ht|>": 50339,
 
38
  "<|id|>": 50275,
39
  "<|is|>": 50311,
40
  "<|it|>": 50274,
 
41
  "<|ja|>": 50266,
42
  "<|jw|>": 50356,
43
  "<|ka|>": 50329,
special_tokens_map.json CHANGED
@@ -22,7 +22,7 @@
22
  "<|hi|>",
23
  "<|fi|>",
24
  "<|vi|>",
25
- "<|iw|>",
26
  "<|uk|>",
27
  "<|el|>",
28
  "<|ms|>",
@@ -130,7 +130,7 @@
130
  "single_word": false
131
  },
132
  "unk_token": {
133
- "content": "",
134
  "lstrip": false,
135
  "normalized": true,
136
  "rstrip": false,
 
22
  "<|hi|>",
23
  "<|fi|>",
24
  "<|vi|>",
25
+ "<|he|>",
26
  "<|uk|>",
27
  "<|el|>",
28
  "<|ms|>",
 
130
  "single_word": false
131
  },
132
  "unk_token": {
133
+ "content": "<|endoftext|>",
134
  "lstrip": false,
135
  "normalized": true,
136
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -179,7 +179,7 @@
179
  "special": true
180
  },
181
  "50279": {
182
- "content": "<|iw|>",
183
  "lstrip": false,
184
  "normalized": false,
185
  "rstrip": false,
@@ -882,7 +882,7 @@
882
  "<|hi|>",
883
  "<|fi|>",
884
  "<|vi|>",
885
- "<|iw|>",
886
  "<|uk|>",
887
  "<|el|>",
888
  "<|ms|>",
@@ -972,10 +972,10 @@
972
  "clean_up_tokenization_spaces": true,
973
  "eos_token": "<|endoftext|>",
974
  "errors": "replace",
975
- "model_max_length": 448,
976
  "pad_token": "<|endoftext|>",
977
  "processor_class": "WhisperProcessor",
978
  "return_attention_mask": false,
979
  "tokenizer_class": "WhisperTokenizer",
980
- "unk_token": ""
981
  }
 
179
  "special": true
180
  },
181
  "50279": {
182
+ "content": "<|he|>",
183
  "lstrip": false,
184
  "normalized": false,
185
  "rstrip": false,
 
882
  "<|hi|>",
883
  "<|fi|>",
884
  "<|vi|>",
885
+ "<|he|>",
886
  "<|uk|>",
887
  "<|el|>",
888
  "<|ms|>",
 
972
  "clean_up_tokenization_spaces": true,
973
  "eos_token": "<|endoftext|>",
974
  "errors": "replace",
975
+ "model_max_length": 1024,
976
  "pad_token": "<|endoftext|>",
977
  "processor_class": "WhisperProcessor",
978
  "return_attention_mask": false,
979
  "tokenizer_class": "WhisperTokenizer",
980
+ "unk_token": "<|endoftext|>"
981
  }
vocab.json CHANGED
@@ -314,6 +314,7 @@
314
  ";;": 35746,
315
  "<": 27,
316
  "</": 3433,
 
317
  "=": 28,
318
  "=\"": 13114,
319
  "=\"#": 34106,
 
314
  ";;": 35746,
315
  "<": 27,
316
  "</": 3433,
317
+ "<|endoftext|>": 50257,
318
  "=": 28,
319
  "=\"": 13114,
320
  "=\"#": 34106,