kisejin commited on
Commit
9f0d6ae
1 Parent(s): 3c7e743

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +4 -4
  2. tokenizer_config.json +1 -1
tokenizer.json CHANGED
@@ -958,8 +958,8 @@
958
  {
959
  "type": "Metaspace",
960
  "replacement": "▁",
961
- "prepend_scheme": "always",
962
- "split": true
963
  }
964
  ]
965
  },
@@ -1020,8 +1020,8 @@
1020
  "decoder": {
1021
  "type": "Metaspace",
1022
  "replacement": "▁",
1023
- "prepend_scheme": "always",
1024
- "split": true
1025
  },
1026
  "model": {
1027
  "type": "Unigram",
 
958
  {
959
  "type": "Metaspace",
960
  "replacement": "▁",
961
+ "add_prefix_space": true,
962
+ "prepend_scheme": "always"
963
  }
964
  ]
965
  },
 
1020
  "decoder": {
1021
  "type": "Metaspace",
1022
  "replacement": "▁",
1023
+ "add_prefix_space": true,
1024
+ "prepend_scheme": "always"
1025
  },
1026
  "model": {
1027
  "type": "Unigram",
tokenizer_config.json CHANGED
@@ -930,7 +930,7 @@
930
  "clean_up_tokenization_spaces": true,
931
  "eos_token": "</s>",
932
  "extra_ids": 100,
933
- "model_max_length": 1000000000000000019884624838656,
934
  "pad_token": "<pad>",
935
  "tokenizer_class": "T5Tokenizer",
936
  "unk_token": "<unk>"
 
930
  "clean_up_tokenization_spaces": true,
931
  "eos_token": "</s>",
932
  "extra_ids": 100,
933
+ "model_max_length": 512,
934
  "pad_token": "<pad>",
935
  "tokenizer_class": "T5Tokenizer",
936
  "unk_token": "<unk>"