Mihaj commited on
Commit
e6cd4e9
·
verified ·
1 Parent(s): e9f142f

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +4 -32
tokenizer_config.json CHANGED
@@ -12976,43 +12976,15 @@
12976
  "<|nocaptions|>",
12977
  "<|notimestamps|>"
12978
  ],
12979
- "bos_token": {
12980
- "__type": "AddedToken",
12981
- "content": "<|endoftext|>",
12982
- "lstrip": false,
12983
- "normalized": true,
12984
- "rstrip": false,
12985
- "single_word": false
12986
- },
12987
  "clean_up_tokenization_spaces": true,
12988
  "dropout": 0.2,
12989
- "eos_token": {
12990
- "__type": "AddedToken",
12991
- "content": "<|endoftext|>",
12992
- "lstrip": false,
12993
- "normalized": true,
12994
- "rstrip": false,
12995
- "single_word": false
12996
- },
12997
  "errors": "replace",
12998
  "model_max_length": 1024,
12999
- "pad_token": {
13000
- "__type": "AddedToken",
13001
- "content": "<|endoftext|>",
13002
- "lstrip": false,
13003
- "normalized": true,
13004
- "rstrip": false,
13005
- "single_word": false
13006
- },
13007
  "processor_class": "WhisperProcessor",
13008
  "return_attention_mask": false,
13009
  "tokenizer_class": "WhisperTokenizer",
13010
- "unk_token": {
13011
- "__type": "AddedToken",
13012
- "content": "<|endoftext|>",
13013
- "lstrip": false,
13014
- "normalized": true,
13015
- "rstrip": false,
13016
- "single_word": false
13017
- }
13018
  }
 
12976
  "<|nocaptions|>",
12977
  "<|notimestamps|>"
12978
  ],
12979
+ "bos_token": "<|endoftext|>",
 
 
 
 
 
 
 
12980
  "clean_up_tokenization_spaces": true,
12981
  "dropout": 0.2,
12982
+ "eos_token": "<|endoftext|>",
 
 
 
 
 
 
 
12983
  "errors": "replace",
12984
  "model_max_length": 1024,
12985
+ "pad_token": "<|endoftext|>",
 
 
 
 
 
 
 
12986
  "processor_class": "WhisperProcessor",
12987
  "return_attention_mask": false,
12988
  "tokenizer_class": "WhisperTokenizer",
12989
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
12990
  }