shhossain commited on
Commit
f0f80e9
1 Parent(s): bfe35b1
added_tokens.json CHANGED
@@ -17,7 +17,6 @@
17
  "<|da|>": 50285,
18
  "<|de|>": 50261,
19
  "<|el|>": 50281,
20
- "<|endoftext|>": 50257,
21
  "<|en|>": 50259,
22
  "<|es|>": 50262,
23
  "<|et|>": 50307,
@@ -30,6 +29,7 @@
30
  "<|gu|>": 50333,
31
  "<|haw|>": 50352,
32
  "<|ha|>": 50354,
 
33
  "<|hi|>": 50276,
34
  "<|hr|>": 50291,
35
  "<|ht|>": 50339,
@@ -38,7 +38,6 @@
38
  "<|id|>": 50275,
39
  "<|is|>": 50311,
40
  "<|it|>": 50274,
41
- "<|iw|>": 50279,
42
  "<|ja|>": 50266,
43
  "<|jw|>": 50356,
44
  "<|ka|>": 50329,
 
17
  "<|da|>": 50285,
18
  "<|de|>": 50261,
19
  "<|el|>": 50281,
 
20
  "<|en|>": 50259,
21
  "<|es|>": 50262,
22
  "<|et|>": 50307,
 
29
  "<|gu|>": 50333,
30
  "<|haw|>": 50352,
31
  "<|ha|>": 50354,
32
+ "<|he|>": 50279,
33
  "<|hi|>": 50276,
34
  "<|hr|>": 50291,
35
  "<|ht|>": 50339,
 
38
  "<|id|>": 50275,
39
  "<|is|>": 50311,
40
  "<|it|>": 50274,
 
41
  "<|ja|>": 50266,
42
  "<|jw|>": 50356,
43
  "<|ka|>": 50329,
special_tokens_map.json CHANGED
@@ -22,7 +22,7 @@
22
  "<|hi|>",
23
  "<|fi|>",
24
  "<|vi|>",
25
- "<|iw|>",
26
  "<|uk|>",
27
  "<|el|>",
28
  "<|ms|>",
@@ -124,7 +124,7 @@
124
  },
125
  "pad_token": "<|endoftext|>",
126
  "unk_token": {
127
- "content": "",
128
  "lstrip": false,
129
  "normalized": true,
130
  "rstrip": false,
 
22
  "<|hi|>",
23
  "<|fi|>",
24
  "<|vi|>",
25
+ "<|he|>",
26
  "<|uk|>",
27
  "<|el|>",
28
  "<|ms|>",
 
124
  },
125
  "pad_token": "<|endoftext|>",
126
  "unk_token": {
127
+ "content": "<|endoftext|>",
128
  "lstrip": false,
129
  "normalized": true,
130
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -19,7 +19,7 @@
19
  "single_word": false
20
  },
21
  "errors": "replace",
22
- "model_max_length": 448,
23
  "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "return_attention_mask": false,
@@ -27,7 +27,7 @@
27
  "trust_remote_code": false,
28
  "unk_token": {
29
  "__type": "AddedToken",
30
- "content": "",
31
  "lstrip": false,
32
  "normalized": true,
33
  "rstrip": false,
 
19
  "single_word": false
20
  },
21
  "errors": "replace",
22
+ "model_max_length": 1024,
23
  "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "return_attention_mask": false,
 
27
  "trust_remote_code": false,
28
  "unk_token": {
29
  "__type": "AddedToken",
30
+ "content": "<|endoftext|>",
31
  "lstrip": false,
32
  "normalized": true,
33
  "rstrip": false,
vocab.json CHANGED
@@ -314,6 +314,7 @@
314
  ";;": 35746,
315
  "<": 27,
316
  "</": 3433,
 
317
  "=": 28,
318
  "=\"": 13114,
319
  "=\"#": 34106,
 
314
  ";;": 35746,
315
  "<": 27,
316
  "</": 3433,
317
+ "<|endoftext|>": 50257,
318
  "=": 28,
319
  "=\"": 13114,
320
  "=\"#": 34106,