AmirMohseni commited on
Commit
7312d64
1 Parent(s): e99b726

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -13,5 +13,11 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<|eot_id|>"
 
 
 
 
 
 
17
  }
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
  }
tokenizer.json CHANGED
@@ -10,9 +10,9 @@
10
  "strategy": "BatchLongest",
11
  "direction": "Right",
12
  "pad_to_multiple_of": null,
13
- "pad_id": 128009,
14
  "pad_type_id": 0,
15
- "pad_token": "<|eot_id|>"
16
  },
17
  "added_tokens": [
18
  {
@@ -2318,6 +2318,15 @@
2318
  "rstrip": false,
2319
  "normalized": false,
2320
  "special": true
 
 
 
 
 
 
 
 
 
2321
  }
2322
  ],
2323
  "normalizer": null,
 
10
  "strategy": "BatchLongest",
11
  "direction": "Right",
12
  "pad_to_multiple_of": null,
13
+ "pad_id": 128256,
14
  "pad_type_id": 0,
15
+ "pad_token": "[PAD]"
16
  },
17
  "added_tokens": [
18
  {
 
2318
  "rstrip": false,
2319
  "normalized": false,
2320
  "special": true
2321
+ },
2322
+ {
2323
+ "id": 128256,
2324
+ "content": "[PAD]",
2325
+ "single_word": false,
2326
+ "lstrip": false,
2327
+ "rstrip": false,
2328
+ "normalized": false,
2329
+ "special": true
2330
  }
2331
  ],
2332
  "normalizer": null,
tokenizer_config.json CHANGED
@@ -2047,6 +2047,14 @@
2047
  "rstrip": false,
2048
  "single_word": false,
2049
  "special": true
 
 
 
 
 
 
 
 
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
@@ -2058,6 +2066,6 @@
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 131072,
2061
- "pad_token": "<|eot_id|>",
2062
  "tokenizer_class": "PreTrainedTokenizerFast"
2063
  }
 
2047
  "rstrip": false,
2048
  "single_word": false,
2049
  "special": true
2050
+ },
2051
+ "128256": {
2052
+ "content": "[PAD]",
2053
+ "lstrip": false,
2054
+ "normalized": false,
2055
+ "rstrip": false,
2056
+ "single_word": false,
2057
+ "special": true
2058
  }
2059
  },
2060
  "bos_token": "<|begin_of_text|>",
 
2066
  "attention_mask"
2067
  ],
2068
  "model_max_length": 131072,
2069
+ "pad_token": "[PAD]",
2070
  "tokenizer_class": "PreTrainedTokenizerFast"
2071
  }