Add tokenizer files

Browse files

Files changed (4) hide show

config.json +3 -3
pytorch_model.bin +1 -1
special_tokens_map.json +1 -1
tokenizer_config.json +1 -1

config.json CHANGED Viewed

@@ -3,9 +3,9 @@
     "CanineModel"
   ],
   "attention_probs_dropout_prob": 0.1,
-  "bos_token_id": 0,
   "downsampling_rate": 4,
-  "eos_token_id": 2,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
@@ -19,7 +19,7 @@
   "num_hash_buckets": 16384,
   "num_hash_functions": 8,
   "num_hidden_layers": 12,
-  "pad_token_id": 1,
   "transformers_version": "4.7.0.dev0",
   "type_vocab_size": 16,
   "upsampling_kernel_size": 4,

     "CanineModel"
   ],
   "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 57344,
   "downsampling_rate": 4,
+  "eos_token_id": 57345,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "num_hash_buckets": 16384,
   "num_hash_functions": 8,
   "num_hidden_layers": 12,
+  "pad_token_id": 0,
   "transformers_version": "4.7.0.dev0",
   "type_vocab_size": 16,
   "upsampling_kernel_size": 4,

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6086b787010105aee1c8c2bc3048271b58c81adc0021d898bc2830b271556301
 size 528561767

 version https://git-lfs.github.com/spec/v1
+oid sha256:2828cdf64dc63f7de96cab8a952395245ac3e011223b9bcd5e4f3679b810ae7a
 size 528561767

special_tokens_map.json CHANGED Viewed

	@@ -1 +1 @@
1	- {}

+ {"bos_token": {"content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "sep_token": {"content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "cls_token": {"content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}}

tokenizer_config.json CHANGED Viewed

	@@ -1 +1 @@
1	- {}

+ {"bos_token": {"content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "sep_token": {"content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 2048}