ZhiyuanChen commited on
Commit
2bb76ad
1 Parent(s): 1dc22f6

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer_config.json +3 -0
  2. vocab.txt +10 -10
tokenizer_config.json CHANGED
@@ -55,10 +55,13 @@
55
  "bos_token": "<cls>",
56
  "clean_up_tokenization_spaces": true,
57
  "cls_token": "<cls>",
 
58
  "eos_token": "<eos>",
59
  "mask_token": "<mask>",
60
  "model_max_length": 1000000000000000019884624838656,
 
61
  "pad_token": "<pad>",
 
62
  "sep_token": "<eos>",
63
  "tokenizer_class": "RnaTokenizer",
64
  "unk_token": "<unk>"
 
55
  "bos_token": "<cls>",
56
  "clean_up_tokenization_spaces": true,
57
  "cls_token": "<cls>",
58
+ "codon": false,
59
  "eos_token": "<eos>",
60
  "mask_token": "<mask>",
61
  "model_max_length": 1000000000000000019884624838656,
62
+ "nmers": 1,
63
  "pad_token": "<pad>",
64
+ "replace_T_with_U": true,
65
  "sep_token": "<eos>",
66
  "tokenizer_class": "RnaTokenizer",
67
  "unk_token": "<unk>"
vocab.txt CHANGED
@@ -9,18 +9,18 @@ C
9
  G
10
  U
11
  N
12
- I
13
- X
14
- V
15
- H
16
- D
17
- B
18
- M
19
  R
20
- W
21
- S
22
  Y
 
 
23
  K
 
 
 
 
 
24
  .
 
25
  *
26
- -
 
 
9
  G
10
  U
11
  N
 
 
 
 
 
 
 
12
  R
 
 
13
  Y
14
+ S
15
+ W
16
  K
17
+ M
18
+ B
19
+ D
20
+ H
21
+ V
22
  .
23
+ X
24
  *
25
+ -
26
+ I