new proc
Browse files- added_tokens.json +1 -2
- special_tokens_map.json +2 -2
- tokenizer_config.json +2 -2
- vocab.json +1 -0
added_tokens.json
CHANGED
@@ -17,7 +17,6 @@
|
|
17 |
"<|da|>": 50285,
|
18 |
"<|de|>": 50261,
|
19 |
"<|el|>": 50281,
|
20 |
-
"<|endoftext|>": 50257,
|
21 |
"<|en|>": 50259,
|
22 |
"<|es|>": 50262,
|
23 |
"<|et|>": 50307,
|
@@ -30,6 +29,7 @@
|
|
30 |
"<|gu|>": 50333,
|
31 |
"<|haw|>": 50352,
|
32 |
"<|ha|>": 50354,
|
|
|
33 |
"<|hi|>": 50276,
|
34 |
"<|hr|>": 50291,
|
35 |
"<|ht|>": 50339,
|
@@ -38,7 +38,6 @@
|
|
38 |
"<|id|>": 50275,
|
39 |
"<|is|>": 50311,
|
40 |
"<|it|>": 50274,
|
41 |
-
"<|iw|>": 50279,
|
42 |
"<|ja|>": 50266,
|
43 |
"<|jw|>": 50356,
|
44 |
"<|ka|>": 50329,
|
|
|
17 |
"<|da|>": 50285,
|
18 |
"<|de|>": 50261,
|
19 |
"<|el|>": 50281,
|
|
|
20 |
"<|en|>": 50259,
|
21 |
"<|es|>": 50262,
|
22 |
"<|et|>": 50307,
|
|
|
29 |
"<|gu|>": 50333,
|
30 |
"<|haw|>": 50352,
|
31 |
"<|ha|>": 50354,
|
32 |
+
"<|he|>": 50279,
|
33 |
"<|hi|>": 50276,
|
34 |
"<|hr|>": 50291,
|
35 |
"<|ht|>": 50339,
|
|
|
38 |
"<|id|>": 50275,
|
39 |
"<|is|>": 50311,
|
40 |
"<|it|>": 50274,
|
|
|
41 |
"<|ja|>": 50266,
|
42 |
"<|jw|>": 50356,
|
43 |
"<|ka|>": 50329,
|
special_tokens_map.json
CHANGED
@@ -22,7 +22,7 @@
|
|
22 |
"<|hi|>",
|
23 |
"<|fi|>",
|
24 |
"<|vi|>",
|
25 |
-
"<|
|
26 |
"<|uk|>",
|
27 |
"<|el|>",
|
28 |
"<|ms|>",
|
@@ -124,7 +124,7 @@
|
|
124 |
},
|
125 |
"pad_token": "<|endoftext|>",
|
126 |
"unk_token": {
|
127 |
-
"content": "",
|
128 |
"lstrip": false,
|
129 |
"normalized": true,
|
130 |
"rstrip": false,
|
|
|
22 |
"<|hi|>",
|
23 |
"<|fi|>",
|
24 |
"<|vi|>",
|
25 |
+
"<|he|>",
|
26 |
"<|uk|>",
|
27 |
"<|el|>",
|
28 |
"<|ms|>",
|
|
|
124 |
},
|
125 |
"pad_token": "<|endoftext|>",
|
126 |
"unk_token": {
|
127 |
+
"content": "<|endoftext|>",
|
128 |
"lstrip": false,
|
129 |
"normalized": true,
|
130 |
"rstrip": false,
|
tokenizer_config.json
CHANGED
@@ -19,7 +19,7 @@
|
|
19 |
"single_word": false
|
20 |
},
|
21 |
"errors": "replace",
|
22 |
-
"model_max_length":
|
23 |
"pad_token": null,
|
24 |
"processor_class": "WhisperProcessor",
|
25 |
"return_attention_mask": false,
|
@@ -27,7 +27,7 @@
|
|
27 |
"trust_remote_code": false,
|
28 |
"unk_token": {
|
29 |
"__type": "AddedToken",
|
30 |
-
"content": "",
|
31 |
"lstrip": false,
|
32 |
"normalized": true,
|
33 |
"rstrip": false,
|
|
|
19 |
"single_word": false
|
20 |
},
|
21 |
"errors": "replace",
|
22 |
+
"model_max_length": 1024,
|
23 |
"pad_token": null,
|
24 |
"processor_class": "WhisperProcessor",
|
25 |
"return_attention_mask": false,
|
|
|
27 |
"trust_remote_code": false,
|
28 |
"unk_token": {
|
29 |
"__type": "AddedToken",
|
30 |
+
"content": "<|endoftext|>",
|
31 |
"lstrip": false,
|
32 |
"normalized": true,
|
33 |
"rstrip": false,
|
vocab.json
CHANGED
@@ -314,6 +314,7 @@
|
|
314 |
";;": 35746,
|
315 |
"<": 27,
|
316 |
"</": 3433,
|
|
|
317 |
"=": 28,
|
318 |
"=\"": 13114,
|
319 |
"=\"#": 34106,
|
|
|
314 |
";;": 35746,
|
315 |
"<": 27,
|
316 |
"</": 3433,
|
317 |
+
"<|endoftext|>": 50257,
|
318 |
"=": 28,
|
319 |
"=\"": 13114,
|
320 |
"=\"#": 34106,
|