hyunwoongko commited on
Commit
2b2e192
1 Parent(s): 7b6a34b

Upload 3 files

Browse files
special_tokens_map.json CHANGED
@@ -1 +1,11 @@
1
- {"eos_token": "<eos>", "pad_token": "<pad>", "additional_special_tokens": ["<|endoftext|>", "<|sep|>", "<|acc|>", "<|tel|>", "<|rrn|>"]}
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|endoftext|>",
4
+ "<|sep|>",
5
+ "<|acc|>",
6
+ "<|tel|>",
7
+ "<|rrn|>"
8
+ ],
9
+ "eos_token": "<|endoftext|>",
10
+ "pad_token": "<|endoftext|>"
11
+ }
tokenizer.json CHANGED
@@ -5,7 +5,7 @@
5
  "added_tokens": [
6
  {
7
  "id": 0,
8
- "content": "<pad>",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
@@ -14,7 +14,7 @@
14
  },
15
  {
16
  "id": 1,
17
- "content": "<eos>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
@@ -107,8 +107,8 @@
107
  "end_of_word_suffix": null,
108
  "fuse_unk": false,
109
  "vocab": {
110
- "<pad>": 0,
111
- "<eos>": 1,
112
  "<|endoftext|>": 2,
113
  "<|sep|>": 3,
114
  "!": 4,
@@ -59851,4 +59851,4 @@
59851
  "ìºIJ롤ëĿ¼ ìĿ´ëĤĺ"
59852
  ]
59853
  }
59854
- }
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
8
+ "content": "<|unused0|>",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
 
14
  },
15
  {
16
  "id": 1,
17
+ "content": "<|unused1|>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
 
107
  "end_of_word_suffix": null,
108
  "fuse_unk": false,
109
  "vocab": {
110
+ "<|unused0|>": 0,
111
+ "<|unused1|>": 1,
112
  "<|endoftext|>": 2,
113
  "<|sep|>": 3,
114
  "!": 4,
 
59851
  "ìºIJ롤ëĿ¼ ìĿ´ëĤĺ"
59852
  ]
59853
  }
59854
+ }
tokenizer_config.json CHANGED
@@ -1 +1,6 @@
1
- {"pad_token": "<pad>", "eos_token": "<eos>", "tokenizer_class": "PreTrainedTokenizerFast"}
 
 
 
 
 
 
1
+ {
2
+ "name_or_path": "EleutherAI/polyglot-ko-5.8b",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "tokenizer_class": "PreTrainedTokenizerFast"
6
+ }