Safetensors
English
llama
DNA
RNA
genomic
metagenomic
Shangshang Wang commited on
Commit
370375e
1 Parent(s): 19d15ff

Add model and tokenizer files

Browse files
config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/project/neiswang_1391/MGFM/MGFM-serving/model_ckpts/safetensors/step-00086000",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 3,
9
+ "eos_token_id": 4,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 11008,
15
+ "mask_token_id": 5,
16
+ "max_position_embeddings": 512,
17
+ "max_sequence_length": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "llama",
20
+ "num_attention_heads": 32,
21
+ "num_hidden_layers": 32,
22
+ "num_key_value_heads": 32,
23
+ "pad_token_id": 0,
24
+ "pretraining_tp": 1,
25
+ "rms_norm_eps": 1e-05,
26
+ "rope_scaling": null,
27
+ "rope_theta": 10000.0,
28
+ "sep_token_id": 2,
29
+ "tie_word_embeddings": false,
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.46.3",
32
+ "unk_token_id": 1,
33
+ "use_cache": true,
34
+ "vocab_size": 1024
35
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 3,
4
+ "eos_token_id": 4,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.46.3"
7
+ }
model-00001-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f8e8789e005c63157a5c61e1a5974109136d44259020ac9e779f33a0b487480
3
+ size 4941093144
model-00002-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fa6abadcb468608f19df31644f74c9151c28c8df3007a9b4f7946ea96bb6784
3
+ size 4991424824
model-00003-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ecf5b3c2a7311ca4cb39a8832d276817ff0cafb328f70b1dab12a3a382774b7
3
+ size 4924315880
model-00004-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d481bc4e96ae0b2302931395315a3dc373e556d4269c564d4b306f6a1e5c451e
3
+ size 4857206904
model-00005-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25778d64535c016d32cedf1fceaf13a22bf52b3c557e129c2cdbb723eab017a6
3
+ size 4857206904
model-00006-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8abf5e8a7aef8884cf729f8efb97f02f799876e31ab326cb99565e7c52444a39
3
+ size 1367426832
model.safetensors.index.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 25938640896
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00006-of-00006.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00006.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00006.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00006.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00006.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00006.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00003-of-00006.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00003-of-00006.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00003-of-00006.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00003-of-00006.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00003-of-00006.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00003-of-00006.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00004-of-00006.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00004-of-00006.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00006.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00004-of-00006.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00004-of-00006.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00004-of-00006.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00004-of-00006.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00005-of-00006.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00005-of-00006.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00005-of-00006.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00005-of-00006.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00005-of-00006.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00005-of-00006.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00006.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00006-of-00006.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00006-of-00006.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
242
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00006.safetensors",
243
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
244
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
245
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
246
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
247
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
248
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
249
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
250
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
251
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00006.safetensors",
252
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
253
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
254
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
255
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
256
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
257
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
258
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
259
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
260
+ "model.layers.6.input_layernorm.weight": "model-00002-of-00006.safetensors",
261
+ "model.layers.6.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
262
+ "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
263
+ "model.layers.6.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
264
+ "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
265
+ "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
266
+ "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
267
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
268
+ "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
269
+ "model.layers.7.input_layernorm.weight": "model-00002-of-00006.safetensors",
270
+ "model.layers.7.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
271
+ "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
272
+ "model.layers.7.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
273
+ "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
274
+ "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
275
+ "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
276
+ "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
277
+ "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
278
+ "model.layers.8.input_layernorm.weight": "model-00002-of-00006.safetensors",
279
+ "model.layers.8.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
280
+ "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
281
+ "model.layers.8.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
282
+ "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
283
+ "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
284
+ "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
285
+ "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
286
+ "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
287
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00006.safetensors",
288
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
289
+ "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
290
+ "model.layers.9.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
291
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
292
+ "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
293
+ "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
294
+ "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
295
+ "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
296
+ "model.norm.weight": "model-00006-of-00006.safetensors"
297
+ }
298
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[BOS]",
3
+ "eos_token": "[EOS]",
4
+ "mask_token": "[MASK]",
5
+ "pad_token": "[PAD]",
6
+ "sep_token": "[SEP]",
7
+ "unk_token": "[UNK]"
8
+ }
tokenizer.json ADDED
@@ -0,0 +1,2191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "[PAD]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "[UNK]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "[SEP]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "[BOS]",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 4,
44
+ "content": "[EOS]",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ },
51
+ {
52
+ "id": 5,
53
+ "content": "[MASK]",
54
+ "single_word": false,
55
+ "lstrip": false,
56
+ "rstrip": false,
57
+ "normalized": false,
58
+ "special": true
59
+ }
60
+ ],
61
+ "normalizer": {
62
+ "type": "Replace",
63
+ "pattern": {
64
+ "Regex": "^"
65
+ },
66
+ "content": "_"
67
+ },
68
+ "pre_tokenizer": {
69
+ "type": "Whitespace"
70
+ },
71
+ "post_processor": {
72
+ "type": "TemplateProcessing",
73
+ "single": [
74
+ {
75
+ "Sequence": {
76
+ "id": "A",
77
+ "type_id": 0
78
+ }
79
+ },
80
+ {
81
+ "SpecialToken": {
82
+ "id": "[EOS]",
83
+ "type_id": 0
84
+ }
85
+ }
86
+ ],
87
+ "pair": [
88
+ {
89
+ "Sequence": {
90
+ "id": "A",
91
+ "type_id": 0
92
+ }
93
+ },
94
+ {
95
+ "Sequence": {
96
+ "id": "B",
97
+ "type_id": 1
98
+ }
99
+ }
100
+ ],
101
+ "special_tokens": {
102
+ "[EOS]": {
103
+ "id": "[EOS]",
104
+ "ids": [
105
+ 4
106
+ ],
107
+ "tokens": [
108
+ "[EOS]"
109
+ ]
110
+ },
111
+ "[MASK]": {
112
+ "id": "[MASK]",
113
+ "ids": [
114
+ 5
115
+ ],
116
+ "tokens": [
117
+ "[MASK]"
118
+ ]
119
+ },
120
+ "[PAD]": {
121
+ "id": "[PAD]",
122
+ "ids": [
123
+ 0
124
+ ],
125
+ "tokens": [
126
+ "[PAD]"
127
+ ]
128
+ },
129
+ "[SEP]": {
130
+ "id": "[SEP]",
131
+ "ids": [
132
+ 2
133
+ ],
134
+ "tokens": [
135
+ "[SEP]"
136
+ ]
137
+ }
138
+ }
139
+ },
140
+ "decoder": null,
141
+ "model": {
142
+ "type": "BPE",
143
+ "dropout": null,
144
+ "unk_token": "[UNK]",
145
+ "continuing_subword_prefix": null,
146
+ "end_of_word_suffix": null,
147
+ "fuse_unk": false,
148
+ "byte_fallback": false,
149
+ "ignore_merges": false,
150
+ "vocab": {
151
+ "[PAD]": 0,
152
+ "[UNK]": 1,
153
+ "[SEP]": 2,
154
+ "[BOS]": 3,
155
+ "[EOS]": 4,
156
+ "[MASK]": 5,
157
+ "_": 6,
158
+ "A": 7,
159
+ "C": 8,
160
+ "G": 9,
161
+ "T": 10,
162
+ "N": 11,
163
+ "TT": 12,
164
+ "CC": 13,
165
+ "AA": 14,
166
+ "GG": 15,
167
+ "TC": 16,
168
+ "GC": 17,
169
+ "TA": 18,
170
+ "GA": 19,
171
+ "CA": 20,
172
+ "TG": 21,
173
+ "TCC": 22,
174
+ "TAA": 23,
175
+ "TCA": 24,
176
+ "TGG": 25,
177
+ "TTA": 26,
178
+ "GCC": 27,
179
+ "TGC": 28,
180
+ "TTC": 29,
181
+ "GAA": 30,
182
+ "TGA": 31,
183
+ "GTT": 32,
184
+ "TAC": 33,
185
+ "GCA": 34,
186
+ "CCA": 35,
187
+ "GGA": 36,
188
+ "AAA": 37,
189
+ "GTA": 38,
190
+ "GGC": 39,
191
+ "GTC": 40,
192
+ "GAC": 41,
193
+ "AAC": 42,
194
+ "CCC": 43,
195
+ "CAC": 44,
196
+ "TAAA": 45,
197
+ "TTTT": 46,
198
+ "TCCA": 47,
199
+ "TTTC": 48,
200
+ "GTG": 49,
201
+ "TCAC": 50,
202
+ "TTCC": 51,
203
+ "TGCA": 52,
204
+ "TTAA": 53,
205
+ "TTCA": 54,
206
+ "TTAC": 55,
207
+ "TCCC": 56,
208
+ "GCCA": 57,
209
+ "TGGA": 58,
210
+ "TTGG": 59,
211
+ "TCAA": 60,
212
+ "TGAA": 61,
213
+ "GAAA": 62,
214
+ "TGGC": 63,
215
+ "TTTA": 64,
216
+ "TGCC": 65,
217
+ "GTAA": 66,
218
+ "TGAC": 67,
219
+ "TACC": 68,
220
+ "GACC": 69,
221
+ "GTTA": 70,
222
+ "TACA": 71,
223
+ "TCTC": 72,
224
+ "GTTC": 73,
225
+ "GTCA": 74,
226
+ "TTGC": 75,
227
+ "TTGA": 76,
228
+ "GTAC": 77,
229
+ "GGAA": 78,
230
+ "TATC": 79,
231
+ "GAAC": 80,
232
+ "GCAA": 81,
233
+ "GGCA": 82,
234
+ "TAAC": 83,
235
+ "TGTC": 84,
236
+ "GCTC": 85,
237
+ "GTCC": 86,
238
+ "GACA": 87,
239
+ "TAGC": 88,
240
+ "GGCC": 89,
241
+ "CCAC": 90,
242
+ "GTGA": 91,
243
+ "AAAC": 92,
244
+ "GCAC": 93,
245
+ "GTGG": 94,
246
+ "GCCC": 95,
247
+ "AAAA": 96,
248
+ "_A": 97,
249
+ "TTTCA": 98,
250
+ "GCTA": 99,
251
+ "GTGC": 100,
252
+ "GATA": 101,
253
+ "TGTA": 102,
254
+ "TGTT": 103,
255
+ "GCTT": 104,
256
+ "CCCA": 105,
257
+ "TCTT": 106,
258
+ "GGAC": 107,
259
+ "TCGC": 108,
260
+ "GGTA": 109,
261
+ "TCGA": 110,
262
+ "TATT": 111,
263
+ "GGTT": 112,
264
+ "GATT": 113,
265
+ "TCCCC": 114,
266
+ "GAGC": 115,
267
+ "AACA": 116,
268
+ "AACC": 117,
269
+ "GGTC": 118,
270
+ "GATC": 119,
271
+ "GGGG": 120,
272
+ "TTACC": 121,
273
+ "CACC": 122,
274
+ "TATA": 123,
275
+ "GCGG": 124,
276
+ "GCGC": 125,
277
+ "TAGA": 126,
278
+ "CCCC": 127,
279
+ "GCGA": 128,
280
+ "TCTCA": 129,
281
+ "GGGA": 130,
282
+ "GTCAA": 131,
283
+ "GTTAC": 132,
284
+ "TCTA": 133,
285
+ "TCCAC": 134,
286
+ "TTTTC": 135,
287
+ "TCACC": 136,
288
+ "CATC": 137,
289
+ "TACAA": 138,
290
+ "TAGG": 139,
291
+ "GTAAA": 140,
292
+ "GAGA": 141,
293
+ "TTCCA": 142,
294
+ "TGAAA": 143,
295
+ "TCCAA": 144,
296
+ "GTACC": 145,
297
+ "GGTAC": 146,
298
+ "TAAAA": 147,
299
+ "TTCAA": 148,
300
+ "CCACC": 149,
301
+ "CACA": 150,
302
+ "GCACC": 151,
303
+ "TTTGA": 152,
304
+ "TGTG": 153,
305
+ "TTAGC": 154,
306
+ "GGGC": 155,
307
+ "GAGTT": 156,
308
+ "_C": 157,
309
+ "AAACC": 158,
310
+ "TGGAA": 159,
311
+ "TCGTT": 160,
312
+ "TGCCA": 161,
313
+ "TGTAA": 162,
314
+ "TTGTT": 163,
315
+ "TACCA": 164,
316
+ "GAGG": 165,
317
+ "TGCAA": 166,
318
+ "_TC": 167,
319
+ "TTTAA": 168,
320
+ "TAAAC": 169,
321
+ "GATG": 170,
322
+ "GAAAA": 171,
323
+ "TCAAA": 172,
324
+ "TGGAC": 173,
325
+ "GCCCA": 174,
326
+ "TCCCA": 175,
327
+ "TTTTA": 176,
328
+ "GCCAC": 177,
329
+ "TAGTT": 178,
330
+ "TCGAC": 179,
331
+ "GTCCA": 180,
332
+ "TTTGG": 181,
333
+ "GCGGC": 182,
334
+ "TTAAA": 183,
335
+ "TGTTA": 184,
336
+ "TCGG": 185,
337
+ "GTAAC": 186,
338
+ "TGTCA": 187,
339
+ "AAGC": 188,
340
+ "TGTTC": 189,
341
+ "TCTAA": 190,
342
+ "GCTAC": 191,
343
+ "TTCGG": 192,
344
+ "TTGGA": 193,
345
+ "GCTTTC": 194,
346
+ "TTACA": 195,
347
+ "CCAA": 196,
348
+ "TGCAC": 197,
349
+ "GAACA": 198,
350
+ "TCAAC": 199,
351
+ "TTTAAA": 200,
352
+ "GGGTT": 201,
353
+ "TTTCC": 202,
354
+ "TGAAC": 203,
355
+ "GCCCC": 204,
356
+ "TCGCA": 205,
357
+ "TCGCC": 206,
358
+ "TTGTA": 207,
359
+ "TCACA": 208,
360
+ "GATGA": 209,
361
+ "TATCC": 210,
362
+ "TTCAC": 211,
363
+ "GAAAC": 212,
364
+ "GCTGA": 213,
365
+ "GCTTA": 214,
366
+ "TCTCC": 215,
367
+ "_TCC": 216,
368
+ "GCTCC": 217,
369
+ "GACAA": 218,
370
+ "TCTTC": 219,
371
+ "GCGCC": 220,
372
+ "GGGTA": 221,
373
+ "TGGCC": 222,
374
+ "TGTCC": 223,
375
+ "TTAAC": 224,
376
+ "TAACC": 225,
377
+ "GGACC": 226,
378
+ "TTGTC": 227,
379
+ "GTTCA": 228,
380
+ "TTGAC": 229,
381
+ "GCCCCA": 230,
382
+ "GTGGC": 231,
383
+ "GCTTC": 232,
384
+ "TATTC": 233,
385
+ "GTTAA": 234,
386
+ "TTTAC": 235,
387
+ "GGCAA": 236,
388
+ "GGTTC": 237,
389
+ "TAACA": 238,
390
+ "TGACC": 239,
391
+ "GTGCA": 240,
392
+ "TTGAA": 241,
393
+ "TGCCC": 242,
394
+ "TGGCA": 243,
395
+ "TTGCA": 244,
396
+ "GATGC": 245,
397
+ "AATA": 246,
398
+ "TAAGCC": 247,
399
+ "GCGCA": 248,
400
+ "TCTAC": 249,
401
+ "TTAACC": 250,
402
+ "GAACC": 251,
403
+ "_AC": 252,
404
+ "TGCTTC": 253,
405
+ "TATAA": 254,
406
+ "TATCA": 255,
407
+ "TGGGTT": 256,
408
+ "GTAGGA": 257,
409
+ "TTAGG": 258,
410
+ "TGTGG": 259,
411
+ "TTGCC": 260,
412
+ "TTTCCC": 261,
413
+ "TCCCCA": 262,
414
+ "TACAC": 263,
415
+ "GACCA": 264,
416
+ "TACCC": 265,
417
+ "GTGGA": 266,
418
+ "TGTGA": 267,
419
+ "GTCAC": 268,
420
+ "AACAA": 269,
421
+ "GCTAA": 270,
422
+ "AAACA": 271,
423
+ "TTCCC": 272,
424
+ "GGTGC": 273,
425
+ "GATCA": 274,
426
+ "GATAA": 275,
427
+ "GGTCC": 276,
428
+ "GATGG": 277,
429
+ "GAGCA": 278,
430
+ "GAGAA": 279,
431
+ "GATTA": 280,
432
+ "GGGGA": 281,
433
+ "GTGAA": 282,
434
+ "GAGGA": 283,
435
+ "TTTGC": 284,
436
+ "TCGAA": 285,
437
+ "GCACCC": 286,
438
+ "GTCCC": 287,
439
+ "GGCCA": 288,
440
+ "TTTTCC": 289,
441
+ "GT": 290,
442
+ "GGGCA": 291,
443
+ "TCTGC": 292,
444
+ "GCAAC": 293,
445
+ "GAGCC": 294,
446
+ "GGCAC": 295,
447
+ "TCATCC": 296,
448
+ "TTTCAC": 297,
449
+ "TTCACC": 298,
450
+ "TGACA": 299,
451
+ "GATCC": 300,
452
+ "TTTCACC": 301,
453
+ "TATTA": 302,
454
+ "GGGCC": 303,
455
+ "GTTCC": 304,
456
+ "TGTAC": 305,
457
+ "GTGCC": 306,
458
+ "TGCTGCC": 307,
459
+ "GGTGG": 308,
460
+ "TCTGG": 309,
461
+ "TATGC": 310,
462
+ "GTGAC": 311,
463
+ "TGCTGG": 312,
464
+ "GGGAA": 313,
465
+ "GGAAC": 314,
466
+ "TCGTC": 315,
467
+ "GCTG": 316,
468
+ "TAGGA": 317,
469
+ "GAGGC": 318,
470
+ "TATAC": 319,
471
+ "TAGCC": 320,
472
+ "GGGGC": 321,
473
+ "GCGTT": 322,
474
+ "GGAAA": 323,
475
+ "GACCC": 324,
476
+ "GGTAA": 325,
477
+ "TTCTGA": 326,
478
+ "TCTTA": 327,
479
+ "TAGAA": 328,
480
+ "GGGTC": 329,
481
+ "GTGTCTCA": 330,
482
+ "TTGGC": 331,
483
+ "TCAGC": 332,
484
+ "CCACA": 333,
485
+ "TCGGC": 334,
486
+ "TCCACC": 335,
487
+ "GCTGC": 336,
488
+ "GACAC": 337,
489
+ "GTATTTA": 338,
490
+ "GCAAA": 339,
491
+ "TCTGA": 340,
492
+ "GCCAA": 341,
493
+ "GGCCC": 342,
494
+ "_CC": 343,
495
+ "TTTTCA": 344,
496
+ "TCCCGTAGGA": 345,
497
+ "TACTCA": 346,
498
+ "TTTG": 347,
499
+ "GATTC": 348,
500
+ "GTACA": 349,
501
+ "TATTAA": 350,
502
+ "TGCTGCCTCCCGTAGGA": 351,
503
+ "GGTCA": 352,
504
+ "GCGTC": 353,
505
+ "TAGTA": 354,
506
+ "TGAAAA": 355,
507
+ "GGTGA": 356,
508
+ "GGGACC": 357,
509
+ "GGACA": 358,
510
+ "TCACCC": 359,
511
+ "TGGACC": 360,
512
+ "TTCCAA": 361,
513
+ "CACCA": 362,
514
+ "GATAC": 363,
515
+ "TTTCAA": 364,
516
+ "CCCAA": 365,
517
+ "TCACGGTAC": 366,
518
+ "GGAGTT": 367,
519
+ "GCTGG": 368,
520
+ "TTAGCA": 369,
521
+ "GCTCA": 370,
522
+ "TCGTA": 371,
523
+ "TTCCCA": 372,
524
+ "TATTTCAC": 373,
525
+ "TAGTGA": 374,
526
+ "CCAGTG": 375,
527
+ "TTATAC": 376,
528
+ "TGCTGCCTCCCGTAGGAGTC": 377,
529
+ "TCTACC": 378,
530
+ "TTATC": 379,
531
+ "TAGCA": 380,
532
+ "TGTGC": 381,
533
+ "TCCTCC": 382,
534
+ "TATGA": 383,
535
+ "TGCTTCTAAGCC": 384,
536
+ "TATGG": 385,
537
+ "GGTTGA": 386,
538
+ "TTTGAGTT": 387,
539
+ "GCGAC": 388,
540
+ "TTACCGCGGC": 389,
541
+ "TCACGAC": 390,
542
+ "TTTTCACC": 391,
543
+ "TAGTC": 392,
544
+ "TTAGCC": 393,
545
+ "TTTCCA": 394,
546
+ "GTAGG": 395,
547
+ "GTTTCC": 396,
548
+ "TTTCCCTCACGGTAC": 397,
549
+ "TCTTTT": 398,
550
+ "TTGGCC": 399,
551
+ "TTCACA": 400,
552
+ "GGATCAC": 401,
553
+ "TCATTA": 402,
554
+ "GTCAAAC": 403,
555
+ "GGTTA": 404,
556
+ "GAGAC": 405,
557
+ "TTCAAA": 406,
558
+ "GCAGAC": 407,
559
+ "GCGAA": 408,
560
+ "GTACTCCCCA": 409,
561
+ "GTTTGA": 410,
562
+ "GAGTA": 411,
563
+ "GTTGC": 412,
564
+ "TACCAGGGTA": 413,
565
+ "TTCCAC": 414,
566
+ "GGGAC": 415,
567
+ "GTGGAC": 416,
568
+ "TGCTTTC": 417,
569
+ "TGTCTCACGAC": 418,
570
+ "TCTAATCC": 419,
571
+ "GCATTC": 420,
572
+ "GCCTTC": 421,
573
+ "GGATC": 422,
574
+ "GCCTCC": 423,
575
+ "TCAAAA": 424,
576
+ "TCTG": 425,
577
+ "TTCAAC": 426,
578
+ "GGTGCC": 427,
579
+ "TGCACC": 428,
580
+ "TAGAC": 429,
581
+ "TATTTT": 430,
582
+ "GAGTC": 431,
583
+ "TTACCGCGGCTGCTGG": 432,
584
+ "TTTGAA": 433,
585
+ "GTTAGCC": 434,
586
+ "TGCTCC": 435,
587
+ "TTGCCA": 436,
588
+ "TACACC": 437,
589
+ "TACCAGGGTATCTAATCC": 438,
590
+ "TAGCTAA": 439,
591
+ "TGTTCC": 440,
592
+ "TTGCAC": 441,
593
+ "CACAA": 442,
594
+ "TCTTCC": 443,
595
+ "GATTCC": 444,
596
+ "TGGGC": 445,
597
+ "TTACGCTTTC": 446,
598
+ "AACATCC": 447,
599
+ "TACGCA": 448,
600
+ "TCATC": 449,
601
+ "GCGTA": 450,
602
+ "TTCTTC": 451,
603
+ "TAAAAC": 452,
604
+ "GCGGA": 453,
605
+ "TACGGC": 454,
606
+ "GGGTGG": 455,
607
+ "TTACGCTTTCTTTAAA": 456,
608
+ "TTGTCC": 457,
609
+ "TGTTTT": 458,
610
+ "TTGTAA": 459,
611
+ "TTCCTTTGAGTT": 460,
612
+ "TCCGGA": 461,
613
+ "TCGGA": 462,
614
+ "TCCACA": 463,
615
+ "GCCTTGG": 464,
616
+ "TGGCCA": 465,
617
+ "TGCTC": 466,
618
+ "GACTAA": 467,
619
+ "_ACC": 468,
620
+ "TATTCA": 469,
621
+ "TGAGCC": 470,
622
+ "TAGGC": 471,
623
+ "GGTG": 472,
624
+ "TTTGCC": 473,
625
+ "GCAGTT": 474,
626
+ "TTGGGACC": 475,
627
+ "TACACCA": 476,
628
+ "TTAGAA": 477,
629
+ "TCTTGC": 478,
630
+ "TGAGA": 479,
631
+ "AAACCA": 480,
632
+ "GGTTTCA": 481,
633
+ "TAAACA": 482,
634
+ "TACTC": 483,
635
+ "TCAACC": 484,
636
+ "GTTTC": 485,
637
+ "GGCGA": 486,
638
+ "GCTTTT": 487,
639
+ "_TG": 488,
640
+ "TTAAAA": 489,
641
+ "TTGTG": 490,
642
+ "GAACAA": 491,
643
+ "TTAACA": 492,
644
+ "TTGGCA": 493,
645
+ "GTATTTAGCC": 494,
646
+ "TATCAC": 495,
647
+ "TTACCGCGGCTGCTGGCAC": 496,
648
+ "GTTCAA": 497,
649
+ "TGCAAAA": 498,
650
+ "GGTTTC": 499,
651
+ "TCCCAA": 500,
652
+ "TCTCGTAC": 501,
653
+ "TTGTCA": 502,
654
+ "GCTACC": 503,
655
+ "TTTACC": 504,
656
+ "TCCTAC": 505,
657
+ "TTGGAA": 506,
658
+ "TCAGAC": 507,
659
+ "TAATAA": 508,
660
+ "GAACTGTCTCACGAC": 509,
661
+ "GATTAAC": 510,
662
+ "TCTTTC": 511,
663
+ "TTAGGA": 512,
664
+ "GCACA": 513,
665
+ "GGCTTC": 514,
666
+ "TTGGGTT": 515,
667
+ "GCAACA": 516,
668
+ "TGAGC": 517,
669
+ "TGGTA": 518,
670
+ "GAACTGTCTCACGACGTTC": 519,
671
+ "_TCCC": 520,
672
+ "GTGAAA": 521,
673
+ "_TCA": 522,
674
+ "TGGGG": 523,
675
+ "TTACCC": 524,
676
+ "GGGAAC": 525,
677
+ "GGGTTC": 526,
678
+ "GGATGG": 527,
679
+ "AACCTCC": 528,
680
+ "TGGCTGCTTCTAAGCC": 529,
681
+ "TGGGCC": 530,
682
+ "TTATTC": 531,
683
+ "GTATTACCGCGGCTGCTGGCAC": 532,
684
+ "TTTCACCCC": 533,
685
+ "AACAAC": 534,
686
+ "TTTACA": 535,
687
+ "GTAAAC": 536,
688
+ "GCCACC": 537,
689
+ "TTTTAA": 538,
690
+ "CCCAGCTC": 539,
691
+ "TTAGATG": 540,
692
+ "TAAGG": 541,
693
+ "TATG": 542,
694
+ "AAACAA": 543,
695
+ "GTGTCTCAGTT": 544,
696
+ "GCCCCAGGA": 545,
697
+ "TGTGTC": 546,
698
+ "GTCCCA": 547,
699
+ "GATTAC": 548,
700
+ "GTCCCC": 549,
701
+ "TACGCC": 550,
702
+ "TGGGTTGTT": 551,
703
+ "TTCGGA": 552,
704
+ "GAAGAA": 553,
705
+ "GATAGGGACC": 554,
706
+ "GTTACA": 555,
707
+ "TACCAA": 556,
708
+ "GTTACC": 557,
709
+ "TACGGA": 558,
710
+ "GACATCGA": 559,
711
+ "TGAGCCA": 560,
712
+ "TACCAGGGTATCTAATCCTGTT": 561,
713
+ "TATCGG": 562,
714
+ "TACGA": 563,
715
+ "TAAGCA": 564,
716
+ "TCACCAAC": 565,
717
+ "TGGACCGTGTCTCAGTT": 566,
718
+ "TAAGTT": 567,
719
+ "_AA": 568,
720
+ "GACCTTAGC": 569,
721
+ "TCTTTA": 570,
722
+ "TGACAA": 571,
723
+ "TCAACA": 572,
724
+ "GCTACA": 573,
725
+ "TCCGACC": 574,
726
+ "GAACCA": 575,
727
+ "GCGGCA": 576,
728
+ "TCATCA": 577,
729
+ "AATATTCC": 578,
730
+ "TCTTTCC": 579,
731
+ "GTCCAC": 580,
732
+ "GCCCAA": 581,
733
+ "GAAGA": 582,
734
+ "CCACCGGATCAC": 583,
735
+ "GCGTG": 584,
736
+ "TTCTCC": 585,
737
+ "GCAAAA": 586,
738
+ "GCCTGC": 587,
739
+ "_TGG": 588,
740
+ "GTCTC": 589,
741
+ "TCGCTTTC": 590,
742
+ "CCACTGCTGCCTCCCGTAGGAGTC": 591,
743
+ "TATACC": 592,
744
+ "TATTTA": 593,
745
+ "GGGGTTC": 594,
746
+ "GCAGA": 595,
747
+ "TGCCTTC": 596,
748
+ "GCAGG": 597,
749
+ "TTAGTA": 598,
750
+ "TGGTCC": 599,
751
+ "GTCACC": 600,
752
+ "TGATCC": 601,
753
+ "TGGACCGTGTCTCAGTTCCAGTG": 602,
754
+ "GCTTTAC": 603,
755
+ "TGCCAA": 604,
756
+ "GCCGTC": 605,
757
+ "TCTTTAAA": 606,
758
+ "_TCTC": 607,
759
+ "TGGTC": 608,
760
+ "GCCGTT": 609,
761
+ "GCTTCA": 610,
762
+ "TTCGGC": 611,
763
+ "TTGAAA": 612,
764
+ "GTATTCACC": 613,
765
+ "TAGTG": 614,
766
+ "GCAGGC": 615,
767
+ "GTTGTT": 616,
768
+ "TAAACC": 617,
769
+ "TACCAC": 618,
770
+ "TGAGG": 619,
771
+ "TTTAAC": 620,
772
+ "TAAAAA": 621,
773
+ "_CA": 622,
774
+ "GGAAAA": 623,
775
+ "TTTTCACCTTTCCCTCACGGTAC": 624,
776
+ "GGTGGA": 625,
777
+ "TCCCCCCA": 626,
778
+ "GTACTCCCCAGGC": 627,
779
+ "GCAGC": 628,
780
+ "TTAGA": 629,
781
+ "TAGCTGTC": 630,
782
+ "TACGC": 631,
783
+ "GTTTCA": 632,
784
+ "GGAGA": 633,
785
+ "GAATC": 634,
786
+ "GACCGCCCCA": 635,
787
+ "TGATCATCC": 636,
788
+ "GTAGA": 637,
789
+ "GCTTGTGC": 638,
790
+ "GTAACA": 639,
791
+ "GACCAA": 640,
792
+ "TGTTAA": 641,
793
+ "TGTTTA": 642,
794
+ "GCTATTACGCTTTCTTTAAA": 643,
795
+ "TTATA": 644,
796
+ "TCGTG": 645,
797
+ "TTACAA": 646,
798
+ "TACAAA": 647,
799
+ "TTATCC": 648,
800
+ "TTACGCAC": 649,
801
+ "TCACTTC": 650,
802
+ "TGCCCA": 651,
803
+ "GAACTGTCTCACGACGTTCTGAA": 652,
804
+ "TATTTCACTCCCC": 653,
805
+ "TGCCAC": 654,
806
+ "_GG": 655,
807
+ "GCCGC": 656,
808
+ "GCCCCC": 657,
809
+ "TAATAC": 658,
810
+ "TGATGA": 659,
811
+ "_TGC": 660,
812
+ "TTCGA": 661,
813
+ "TATTCC": 662,
814
+ "GAAAAA": 663,
815
+ "CCCTTCCA": 664,
816
+ "GGCACC": 665,
817
+ "TGTTGC": 666,
818
+ "GAAACA": 667,
819
+ "TATAAA": 668,
820
+ "TCATAA": 669,
821
+ "TTGTGC": 670,
822
+ "TATTTC": 671,
823
+ "TGCACA": 672,
824
+ "GTCAATTCCTTTGAGTT": 673,
825
+ "GACCGCCCCAGTCAAAC": 674,
826
+ "TTGTAC": 675,
827
+ "GCTTGC": 676,
828
+ "GTAAAA": 677,
829
+ "TATCAA": 678,
830
+ "GACAAA": 679,
831
+ "GACATCGAGGTGCC": 680,
832
+ "TGCTCA": 681,
833
+ "GTATCA": 682,
834
+ "GGGAACGTATTCACC": 683,
835
+ "TCTCAA": 684,
836
+ "TTAACCA": 685,
837
+ "GCTCAC": 686,
838
+ "TCGGTA": 687,
839
+ "TATGCC": 688,
840
+ "GAAACC": 689,
841
+ "TCCTC": 690,
842
+ "TTGACC": 691,
843
+ "CACTGC": 692,
844
+ "GTATGC": 693,
845
+ "CCACGCTTTC": 694,
846
+ "GACTCGCTTTC": 695,
847
+ "TCAGGC": 696,
848
+ "GCTCCA": 697,
849
+ "GACTAACCC": 698,
850
+ "TGGCAA": 699,
851
+ "TCAGGA": 700,
852
+ "GCCAAA": 701,
853
+ "GCAGCA": 702,
854
+ "TTTTATCC": 703,
855
+ "TGGAAA": 704,
856
+ "TGTGAC": 705,
857
+ "TTGTGG": 706,
858
+ "GTCGAGTT": 707,
859
+ "GGACGTTA": 708,
860
+ "GGTTCC": 709,
861
+ "TGGGA": 710,
862
+ "TTATTA": 711,
863
+ "GAACCACCGGATCAC": 712,
864
+ "GAAGTT": 713,
865
+ "TCCAAA": 714,
866
+ "GCTTTA": 715,
867
+ "CCCAAC": 716,
868
+ "TCTGAC": 717,
869
+ "GGCCGAC": 718,
870
+ "TCAGA": 719,
871
+ "TAGGCA": 720,
872
+ "TCGCTACTCA": 721,
873
+ "TGTCAA": 722,
874
+ "TTGCAA": 723,
875
+ "GAACTGTCTCACGACGTTCTGAACCCAGCTC": 724,
876
+ "TAGGAA": 725,
877
+ "GCTGAA": 726,
878
+ "TGAGCCATTACC": 727,
879
+ "TGGACA": 728,
880
+ "GGTTTT": 729,
881
+ "TGTTTC": 730,
882
+ "TTGTTC": 731,
883
+ "CCAGTGA": 732,
884
+ "TTCTCCA": 733,
885
+ "TCTTGA": 734,
886
+ "TGGCGAACA": 735,
887
+ "TTTTCAAC": 736,
888
+ "GTGTGTA": 737,
889
+ "TAATCA": 738,
890
+ "CACTATC": 739,
891
+ "GTTTTA": 740,
892
+ "GTCGAGTTGCAGAC": 741,
893
+ "GTAACC": 742,
894
+ "TTTGAC": 743,
895
+ "TCATTATGCAAAA": 744,
896
+ "GTCATCC": 745,
897
+ "TGATC": 746,
898
+ "TACCTCCA": 747,
899
+ "TGGCGAACAGCCA": 748,
900
+ "GAAGC": 749,
901
+ "GAAGG": 750,
902
+ "GCTCGCC": 751,
903
+ "CATCGTT": 752,
904
+ "GGAATA": 753,
905
+ "TTTGCA": 754,
906
+ "CATCTTCC": 755,
907
+ "TGCGA": 756,
908
+ "TTGACA": 757,
909
+ "TACCCC": 758,
910
+ "TACACA": 759,
911
+ "GTTGAGC": 760,
912
+ "TGTGAA": 761,
913
+ "TGGTTC": 762,
914
+ "TGCTGA": 763,
915
+ "TGCATGC": 764,
916
+ "GCATAC": 765,
917
+ "TAACAA": 766,
918
+ "GACGG": 767,
919
+ "TGTAAA": 768,
920
+ "TCCCGAA": 769,
921
+ "TGGCGAACAGCCATACCC": 770,
922
+ "GCACTTCTGA": 771,
923
+ "GTACAA": 772,
924
+ "TAAGTC": 773,
925
+ "GTCAGTA": 774,
926
+ "TTTTAC": 775,
927
+ "TTCTA": 776,
928
+ "GCCGCC": 777,
929
+ "GCTATC": 778,
930
+ "TGAGCCATTACCTCACCAAC": 779,
931
+ "GGCCCC": 780,
932
+ "GATAAA": 781,
933
+ "TATACA": 782,
934
+ "TCGACTAGTGA": 783,
935
+ "TACTTTC": 784,
936
+ "GGTGAA": 785,
937
+ "TGTTCA": 786,
938
+ "CATCTTCCGCGCA": 787,
939
+ "_TGCC": 788,
940
+ "GATCTC": 789,
941
+ "GTGGAA": 790,
942
+ "TGTCCA": 791,
943
+ "GACACC": 792,
944
+ "GGTCTGGGTTGTT": 793,
945
+ "TTACCA": 794,
946
+ "GCCGGC": 795,
947
+ "GAAGGC": 796,
948
+ "TGCAAC": 797,
949
+ "GCTCCCC": 798,
950
+ "TGGCTGC": 799,
951
+ "GCCTCCGTTAC": 800,
952
+ "GCTCCC": 801,
953
+ "TCCGAA": 802,
954
+ "GTCAAA": 803,
955
+ "GGAGTTAGCC": 804,
956
+ "GAACCC": 805,
957
+ "TTACCAA": 806,
958
+ "_GCC": 807,
959
+ "GCTCGAC": 808,
960
+ "GAGTTC": 809,
961
+ "TGACTGATCATCC": 810,
962
+ "GACTTAA": 811,
963
+ "GGTACA": 812,
964
+ "GATCAA": 813,
965
+ "GGTCCTCTCGTAC": 814,
966
+ "AACTTC": 815,
967
+ "GATGGC": 816,
968
+ "TTTCACCCCTA": 817,
969
+ "GTTTCCCAC": 818,
970
+ "GGAACC": 819,
971
+ "TAATCC": 820,
972
+ "TTTTCGCC": 821,
973
+ "TGGCGAACAGCCATACCCTTGGGACC": 822,
974
+ "TAGCGATTCC": 823,
975
+ "GTCCAA": 824,
976
+ "GCAGCC": 825,
977
+ "CCAGTA": 826,
978
+ "GTGTCA": 827,
979
+ "TTTCACA": 828,
980
+ "GGAGGC": 829,
981
+ "TAATC": 830,
982
+ "TATATC": 831,
983
+ "TAATTC": 832,
984
+ "GTGGACTACCAGGGTATCTAATCCTGTT": 833,
985
+ "TGTTGG": 834,
986
+ "TGGTTTCA": 835,
987
+ "GTGGCA": 836,
988
+ "GATTTT": 837,
989
+ "GTGCAA": 838,
990
+ "TGCTGC": 839,
991
+ "TGAACC": 840,
992
+ "TTACGCACTCTTTAAA": 841,
993
+ "GGACGTTAGCACCC": 842,
994
+ "TCTTCA": 843,
995
+ "GCTTTTC": 844,
996
+ "CCACTGCTGCCTCCCGTAGGAGTCTGGACCGTGTCTCAGTTCCAGTG": 845,
997
+ "GAAGAC": 846,
998
+ "GCATTCGCACTTCTGA": 847,
999
+ "GTAGC": 848,
1000
+ "GGGAAA": 849,
1001
+ "TCCACAA": 850,
1002
+ "GGCCAA": 851,
1003
+ "TGGGGC": 852,
1004
+ "GTGCCA": 853,
1005
+ "TGAACA": 854,
1006
+ "GCTGGC": 855,
1007
+ "_GTA": 856,
1008
+ "GGTCTGGGTTGTTTCCC": 857,
1009
+ "TTAGATGTTTCA": 858,
1010
+ "GCTTGCACCC": 859,
1011
+ "TCCCCC": 860,
1012
+ "GGAATTTC": 861,
1013
+ "TTATACAAAA": 862,
1014
+ "GTATTACCGCGGCTGCTGGCACGGAGTTAGCC": 863,
1015
+ "GCCACA": 864,
1016
+ "GATTTC": 865,
1017
+ "GGACCA": 866,
1018
+ "GTCACA": 867,
1019
+ "CCCACC": 868,
1020
+ "GGTCTC": 869,
1021
+ "GGCTC": 870,
1022
+ "TGTGTCGGTT": 871,
1023
+ "TTTGGGACCTTAGC": 872,
1024
+ "CACGAC": 873,
1025
+ "TCGGTT": 874,
1026
+ "TGTGTT": 875,
1027
+ "GGCTCATTATGCAAAA": 876,
1028
+ "TGCAAA": 877,
1029
+ "GTTTA": 878,
1030
+ "GTAATTCC": 879,
1031
+ "TGGTTCAC": 880,
1032
+ "GTACCTTTTATCC": 881,
1033
+ "GCGTTC": 882,
1034
+ "GAGAAC": 883,
1035
+ "TAAGTA": 884,
1036
+ "TACCCACCA": 885,
1037
+ "TCAGCC": 886,
1038
+ "GGAAACC": 887,
1039
+ "TACTTA": 888,
1040
+ "TAACCA": 889,
1041
+ "CACTTC": 890,
1042
+ "TGCTAA": 891,
1043
+ "_GC": 892,
1044
+ "TTCGTAC": 893,
1045
+ "TCAAAC": 894,
1046
+ "TGCTCGAC": 895,
1047
+ "TGAATGA": 896,
1048
+ "TCTCAC": 897,
1049
+ "GACTAC": 898,
1050
+ "TTAGATA": 899,
1051
+ "GGGGTTCTTTTCGCC": 900,
1052
+ "TACCCA": 901,
1053
+ "TCGGTATTCC": 902,
1054
+ "GTCTA": 903,
1055
+ "CCACAA": 904,
1056
+ "TCCGG": 905,
1057
+ "TAGGGC": 906,
1058
+ "TAAGA": 907,
1059
+ "GTCTCGCA": 908,
1060
+ "GCTACAC": 909,
1061
+ "TTTGGA": 910,
1062
+ "TTAGTC": 911,
1063
+ "TCCACCGCTTGTGC": 912,
1064
+ "TTTACAA": 913,
1065
+ "GGCAAA": 914,
1066
+ "GTTATA": 915,
1067
+ "TTAAAC": 916,
1068
+ "TGTCAC": 917,
1069
+ "TATATT": 918,
1070
+ "GTATTTAGCCTTGGA": 919,
1071
+ "GTCAAC": 920,
1072
+ "GGGCCA": 921,
1073
+ "TCATTC": 922,
1074
+ "TGGCTGCTTCTAAGCCAACCTCC": 923,
1075
+ "GTCGA": 924,
1076
+ "GCTGACCCA": 925,
1077
+ "TTTGGC": 926,
1078
+ "GGGCGG": 927,
1079
+ "TGTTGA": 928,
1080
+ "TTCCCC": 929,
1081
+ "TTTCGG": 930,
1082
+ "GTTCCC": 931,
1083
+ "TGATGG": 932,
1084
+ "TGTAAC": 933,
1085
+ "TATGTA": 934,
1086
+ "GGATCA": 935,
1087
+ "GTTAAA": 936,
1088
+ "GGAACA": 937,
1089
+ "TTATCCA": 938,
1090
+ "GGCGG": 939,
1091
+ "TGTCTC": 940,
1092
+ "_GA": 941,
1093
+ "TAGAAC": 942,
1094
+ "GCTTTACGCCCA": 943,
1095
+ "TACTGA": 944,
1096
+ "GCAACC": 945,
1097
+ "TTATACAAAAGGTAC": 946,
1098
+ "TGAGTC": 947,
1099
+ "GAGCTGAC": 948,
1100
+ "GTTGA": 949,
1101
+ "TGGAAC": 950,
1102
+ "TTGGAC": 951,
1103
+ "TGAAAC": 952,
1104
+ "TCTCAAACCA": 953,
1105
+ "GTAGGAAACC": 954,
1106
+ "TTATGA": 955,
1107
+ "TGTACA": 956,
1108
+ "TTCTGC": 957,
1109
+ "GTCAAAA": 958,
1110
+ "AACACC": 959,
1111
+ "TTCTCACC": 960,
1112
+ "TTATAA": 961,
1113
+ "GGATA": 962,
1114
+ "TTATTCA": 963,
1115
+ "TTACCCC": 964,
1116
+ "GCATCA": 965,
1117
+ "TTCGTGCA": 966,
1118
+ "GTTTTC": 967,
1119
+ "TAAATCA": 968,
1120
+ "TGCTCCCCACGCTTTC": 969,
1121
+ "GTGACA": 970,
1122
+ "TGCCCC": 971,
1123
+ "GGCCCA": 972,
1124
+ "GCCCAC": 973,
1125
+ "TATAGC": 974,
1126
+ "GGCGGC": 975,
1127
+ "AACTTCA": 976,
1128
+ "TATGACC": 977,
1129
+ "TCACAA": 978,
1130
+ "GTGCTCTACC": 979,
1131
+ "GCATGA": 980,
1132
+ "GAATAA": 981,
1133
+ "TACTGC": 982,
1134
+ "GTCGG": 983,
1135
+ "GCTTAC": 984,
1136
+ "TATCCA": 985,
1137
+ "GCTCGCCGCTAC": 986,
1138
+ "TCGACTAGTGAGCTATTACGCTTTCTTTAAA": 987,
1139
+ "TGTCCC": 988,
1140
+ "GACCCC": 989,
1141
+ "TGAGTA": 990,
1142
+ "GGGGTTCTTTTCGCCTTTCCCTCACGGTAC": 991,
1143
+ "TTTGTAA": 992,
1144
+ "GAATCA": 993,
1145
+ "GCACAA": 994,
1146
+ "GAATTC": 995,
1147
+ "GTTTGATTGGCC": 996,
1148
+ "TTCCAAGCC": 997,
1149
+ "CCAGCTA": 998,
1150
+ "TGTCTCCC": 999,
1151
+ "GTTGAGCGATGG": 1000,
1152
+ "TCCAAC": 1001,
1153
+ "GAACCCC": 1002,
1154
+ "TGACGAGCA": 1003,
1155
+ "TGAATGGCTGC": 1004,
1156
+ "GTTACATCTTCCGCGCA": 1005,
1157
+ "TCTCAGACCA": 1006,
1158
+ "GGATGGCTGCTTCTAAGCCAACCTCC": 1007,
1159
+ "TGTTAC": 1008,
1160
+ "GACTCGCTTTCGCTAC": 1009,
1161
+ "TGGGAC": 1010,
1162
+ "GGTACC": 1011,
1163
+ "GGAGC": 1012,
1164
+ "GGGCCCCC": 1013,
1165
+ "TAAACAA": 1014,
1166
+ "GTCGCC": 1015,
1167
+ "GAGGAA": 1016,
1168
+ "GACTTTC": 1017,
1169
+ "TTTTTGA": 1018,
1170
+ "AAAGTT": 1019,
1171
+ "AAAACC": 1020,
1172
+ "TAGAAA": 1021,
1173
+ "GGTTAA": 1022,
1174
+ "GCGCAA": 1023
1175
+ },
1176
+ "merges": [
1177
+ "T T",
1178
+ "C C",
1179
+ "A A",
1180
+ "G G",
1181
+ "T C",
1182
+ "G C",
1183
+ "T A",
1184
+ "G A",
1185
+ "C A",
1186
+ "T G",
1187
+ "T CC",
1188
+ "T AA",
1189
+ "TC A",
1190
+ "T GG",
1191
+ "TT A",
1192
+ "G CC",
1193
+ "T GC",
1194
+ "TT C",
1195
+ "G AA",
1196
+ "T GA",
1197
+ "G TT",
1198
+ "TA C",
1199
+ "GC A",
1200
+ "CC A",
1201
+ "GG A",
1202
+ "AA A",
1203
+ "G TA",
1204
+ "GG C",
1205
+ "G TC",
1206
+ "GA C",
1207
+ "AA C",
1208
+ "CC C",
1209
+ "CA C",
1210
+ "TAA A",
1211
+ "TT TT",
1212
+ "TCC A",
1213
+ "TT TC",
1214
+ "G TG",
1215
+ "TCA C",
1216
+ "TT CC",
1217
+ "TGC A",
1218
+ "TT AA",
1219
+ "TT CA",
1220
+ "TTA C",
1221
+ "TCC C",
1222
+ "GCC A",
1223
+ "TGG A",
1224
+ "TT GG",
1225
+ "TC AA",
1226
+ "TG AA",
1227
+ "GAA A",
1228
+ "TGG C",
1229
+ "TT TA",
1230
+ "TG CC",
1231
+ "G TAA",
1232
+ "TGA C",
1233
+ "TA CC",
1234
+ "GA CC",
1235
+ "G TTA",
1236
+ "TA CA",
1237
+ "TC TC",
1238
+ "G TTC",
1239
+ "G TCA",
1240
+ "TT GC",
1241
+ "TT GA",
1242
+ "G TAC",
1243
+ "GG AA",
1244
+ "TA TC",
1245
+ "GAA C",
1246
+ "GC AA",
1247
+ "GG CA",
1248
+ "TAA C",
1249
+ "TG TC",
1250
+ "GC TC",
1251
+ "G TCC",
1252
+ "GA CA",
1253
+ "TA GC",
1254
+ "GG CC",
1255
+ "CCA C",
1256
+ "G TGA",
1257
+ "AAA C",
1258
+ "GCA C",
1259
+ "G TGG",
1260
+ "GCC C",
1261
+ "AA AA",
1262
+ "_ A",
1263
+ "TT TCA",
1264
+ "GC TA",
1265
+ "G TGC",
1266
+ "GA TA",
1267
+ "TG TA",
1268
+ "TG TT",
1269
+ "GC TT",
1270
+ "CC CA",
1271
+ "TC TT",
1272
+ "GGA C",
1273
+ "TC GC",
1274
+ "GG TA",
1275
+ "TC GA",
1276
+ "TA TT",
1277
+ "GG TT",
1278
+ "GA TT",
1279
+ "TCC CC",
1280
+ "GA GC",
1281
+ "AA CA",
1282
+ "AA CC",
1283
+ "GG TC",
1284
+ "GA TC",
1285
+ "GG GG",
1286
+ "TTA CC",
1287
+ "CA CC",
1288
+ "TA TA",
1289
+ "GC GG",
1290
+ "GC GC",
1291
+ "TA GA",
1292
+ "CC CC",
1293
+ "GC GA",
1294
+ "TC TCA",
1295
+ "GG GA",
1296
+ "GTC AA",
1297
+ "G TTAC",
1298
+ "TC TA",
1299
+ "TCCA C",
1300
+ "TT TTC",
1301
+ "TCA CC",
1302
+ "CA TC",
1303
+ "TAC AA",
1304
+ "TA GG",
1305
+ "G TAAA",
1306
+ "GA GA",
1307
+ "TT CCA",
1308
+ "TG AAA",
1309
+ "TCC AA",
1310
+ "GTA CC",
1311
+ "GG TAC",
1312
+ "TAA AA",
1313
+ "TTC AA",
1314
+ "CCA CC",
1315
+ "CA CA",
1316
+ "GCA CC",
1317
+ "TT TGA",
1318
+ "TG TG",
1319
+ "TTA GC",
1320
+ "GG GC",
1321
+ "GA GTT",
1322
+ "_ C",
1323
+ "AAA CC",
1324
+ "TGG AA",
1325
+ "TC GTT",
1326
+ "TG CCA",
1327
+ "TG TAA",
1328
+ "TT GTT",
1329
+ "TA CCA",
1330
+ "GA GG",
1331
+ "TGC AA",
1332
+ "_ TC",
1333
+ "TT TAA",
1334
+ "TAAA C",
1335
+ "GA TG",
1336
+ "GAA AA",
1337
+ "TC AAA",
1338
+ "TGGA C",
1339
+ "GCC CA",
1340
+ "TCC CA",
1341
+ "TT TTA",
1342
+ "GCCA C",
1343
+ "TA GTT",
1344
+ "TC GAC",
1345
+ "G TCCA",
1346
+ "TT TGG",
1347
+ "GC GGC",
1348
+ "TT AAA",
1349
+ "TG TTA",
1350
+ "TC GG",
1351
+ "GTAA C",
1352
+ "TG TCA",
1353
+ "AA GC",
1354
+ "TG TTC",
1355
+ "TC TAA",
1356
+ "GC TAC",
1357
+ "TTC GG",
1358
+ "TT GGA",
1359
+ "GC TTTC",
1360
+ "TTA CA",
1361
+ "CC AA",
1362
+ "TGCA C",
1363
+ "GAA CA",
1364
+ "TC AAC",
1365
+ "TT TAAA",
1366
+ "GG GTT",
1367
+ "TT TCC",
1368
+ "TG AAC",
1369
+ "GCC CC",
1370
+ "TC GCA",
1371
+ "TC GCC",
1372
+ "TT GTA",
1373
+ "TCA CA",
1374
+ "GA TGA",
1375
+ "TA TCC",
1376
+ "TT CAC",
1377
+ "GAAA C",
1378
+ "GC TGA",
1379
+ "GC TTA",
1380
+ "TC TCC",
1381
+ "_ TCC",
1382
+ "GC TCC",
1383
+ "GAC AA",
1384
+ "TC TTC",
1385
+ "GC GCC",
1386
+ "GG GTA",
1387
+ "TGG CC",
1388
+ "TG TCC",
1389
+ "TT AAC",
1390
+ "TAA CC",
1391
+ "GGA CC",
1392
+ "TT GTC",
1393
+ "GTT CA",
1394
+ "TT GAC",
1395
+ "GCC CCA",
1396
+ "G TGGC",
1397
+ "GC TTC",
1398
+ "TA TTC",
1399
+ "GTT AA",
1400
+ "TT TAC",
1401
+ "GGC AA",
1402
+ "GG TTC",
1403
+ "TAA CA",
1404
+ "TGA CC",
1405
+ "G TGCA",
1406
+ "TT GAA",
1407
+ "TG CCC",
1408
+ "TGG CA",
1409
+ "TT GCA",
1410
+ "GA TGC",
1411
+ "AA TA",
1412
+ "TAA GCC",
1413
+ "GC GCA",
1414
+ "TC TAC",
1415
+ "TTAA CC",
1416
+ "GAA CC",
1417
+ "_A C",
1418
+ "TGC TTC",
1419
+ "TA TAA",
1420
+ "TA TCA",
1421
+ "TGG GTT",
1422
+ "GTA GGA",
1423
+ "TTA GG",
1424
+ "TG TGG",
1425
+ "TT GCC",
1426
+ "TT TCCC",
1427
+ "TCC CCA",
1428
+ "TA CAC",
1429
+ "GA CCA",
1430
+ "TA CCC",
1431
+ "G TGGA",
1432
+ "TG TGA",
1433
+ "G TCAC",
1434
+ "AAC AA",
1435
+ "GC TAA",
1436
+ "AAA CA",
1437
+ "TT CCC",
1438
+ "GG TGC",
1439
+ "GA TCA",
1440
+ "GA TAA",
1441
+ "GG TCC",
1442
+ "GA TGG",
1443
+ "GA GCA",
1444
+ "GA GAA",
1445
+ "GA TTA",
1446
+ "GG GGA",
1447
+ "GTG AA",
1448
+ "GA GGA",
1449
+ "TT TGC",
1450
+ "TC GAA",
1451
+ "GCA CCC",
1452
+ "G TCCC",
1453
+ "GG CCA",
1454
+ "TTTT CC",
1455
+ "G T",
1456
+ "GG GCA",
1457
+ "TC TGC",
1458
+ "GC AAC",
1459
+ "GA GCC",
1460
+ "GG CAC",
1461
+ "TCA TCC",
1462
+ "TT TCAC",
1463
+ "TTCA CC",
1464
+ "TGA CA",
1465
+ "GA TCC",
1466
+ "TTTCA CC",
1467
+ "TA TTA",
1468
+ "GG GCC",
1469
+ "GTT CC",
1470
+ "TG TAC",
1471
+ "GTG CC",
1472
+ "TGC TGCC",
1473
+ "GG TGG",
1474
+ "TC TGG",
1475
+ "TA TGC",
1476
+ "G TGAC",
1477
+ "TGC TGG",
1478
+ "GG GAA",
1479
+ "GG AAC",
1480
+ "TC GTC",
1481
+ "GC TG",
1482
+ "TA GGA",
1483
+ "GA GGC",
1484
+ "TA TAC",
1485
+ "TA GCC",
1486
+ "GG GGC",
1487
+ "GC GTT",
1488
+ "GG AAA",
1489
+ "GA CCC",
1490
+ "GG TAA",
1491
+ "TTC TGA",
1492
+ "TC TTA",
1493
+ "TA GAA",
1494
+ "GG GTC",
1495
+ "GTG TCTCA",
1496
+ "TT GGC",
1497
+ "TCA GC",
1498
+ "CCA CA",
1499
+ "TC GGC",
1500
+ "TCCA CC",
1501
+ "GC TGC",
1502
+ "GA CAC",
1503
+ "GTA TTTA",
1504
+ "GC AAA",
1505
+ "TC TGA",
1506
+ "GCC AA",
1507
+ "GG CCC",
1508
+ "_ CC",
1509
+ "TTTT CA",
1510
+ "TCCC GTAGGA",
1511
+ "TAC TCA",
1512
+ "TT TG",
1513
+ "GA TTC",
1514
+ "GTA CA",
1515
+ "TA TTAA",
1516
+ "TGCTGCC TCCCGTAGGA",
1517
+ "GG TCA",
1518
+ "GC GTC",
1519
+ "TA GTA",
1520
+ "TGAA AA",
1521
+ "GG TGA",
1522
+ "GG GACC",
1523
+ "GGA CA",
1524
+ "TCA CCC",
1525
+ "TGGA CC",
1526
+ "TTCC AA",
1527
+ "CA CCA",
1528
+ "GA TAC",
1529
+ "TTTC AA",
1530
+ "CCC AA",
1531
+ "TCAC GGTAC",
1532
+ "GGA GTT",
1533
+ "GC TGG",
1534
+ "TTA GCA",
1535
+ "GC TCA",
1536
+ "TC GTA",
1537
+ "TTCC CA",
1538
+ "TATT TCAC",
1539
+ "TA GTGA",
1540
+ "CCA GTG",
1541
+ "TTA TAC",
1542
+ "TGCTGCCTCCCGTAGGA GTC",
1543
+ "TC TACC",
1544
+ "TTA TC",
1545
+ "TA GCA",
1546
+ "TG TGC",
1547
+ "TCC TCC",
1548
+ "TA TGA",
1549
+ "TGCTTC TAAGCC",
1550
+ "TA TGG",
1551
+ "GG TTGA",
1552
+ "TTTGA GTT",
1553
+ "GC GAC",
1554
+ "TTACC GCGGC",
1555
+ "TCAC GAC",
1556
+ "TTTT CACC",
1557
+ "TA GTC",
1558
+ "TTA GCC",
1559
+ "TT TCCA",
1560
+ "GTA GG",
1561
+ "GTT TCC",
1562
+ "TTTCCC TCACGGTAC",
1563
+ "TC TTTT",
1564
+ "TTGG CC",
1565
+ "TTCA CA",
1566
+ "GGA TCAC",
1567
+ "TCA TTA",
1568
+ "GTC AAAC",
1569
+ "GG TTA",
1570
+ "GA GAC",
1571
+ "TTC AAA",
1572
+ "GCA GAC",
1573
+ "GC GAA",
1574
+ "GTAC TCCCCA",
1575
+ "GTT TGA",
1576
+ "GA GTA",
1577
+ "GTT GC",
1578
+ "TACCA GGGTA",
1579
+ "TT CCAC",
1580
+ "GG GAC",
1581
+ "G TGGAC",
1582
+ "TGC TTTC",
1583
+ "TGTC TCACGAC",
1584
+ "TCTAA TCC",
1585
+ "GCA TTC",
1586
+ "GCC TTC",
1587
+ "GGA TC",
1588
+ "GCC TCC",
1589
+ "TCAA AA",
1590
+ "TC TG",
1591
+ "TTC AAC",
1592
+ "GG TGCC",
1593
+ "TGCA CC",
1594
+ "TA GAC",
1595
+ "TA TTTT",
1596
+ "GA GTC",
1597
+ "TTACCGCGGC TGCTGG",
1598
+ "TT TGAA",
1599
+ "GTTA GCC",
1600
+ "TGC TCC",
1601
+ "TT GCCA",
1602
+ "TACA CC",
1603
+ "TACCAGGGTA TCTAATCC",
1604
+ "TAGC TAA",
1605
+ "TG TTCC",
1606
+ "TT GCAC",
1607
+ "CAC AA",
1608
+ "TC TTCC",
1609
+ "GA TTCC",
1610
+ "TGG GC",
1611
+ "TTAC GCTTTC",
1612
+ "AACA TCC",
1613
+ "TAC GCA",
1614
+ "TCA TC",
1615
+ "GC GTA",
1616
+ "TTC TTC",
1617
+ "TAA AAC",
1618
+ "GC GGA",
1619
+ "TAC GGC",
1620
+ "GG GTGG",
1621
+ "TTACGCTTTC TTTAAA",
1622
+ "TT GTCC",
1623
+ "TG TTTT",
1624
+ "TT GTAA",
1625
+ "TTCC TTTGAGTT",
1626
+ "TCC GGA",
1627
+ "TC GGA",
1628
+ "TCCA CA",
1629
+ "GCC TTGG",
1630
+ "TGG CCA",
1631
+ "TGC TC",
1632
+ "GAC TAA",
1633
+ "_A CC",
1634
+ "TA TTCA",
1635
+ "TGA GCC",
1636
+ "TA GGC",
1637
+ "GG TG",
1638
+ "TT TGCC",
1639
+ "GCA GTT",
1640
+ "TTGG GACC",
1641
+ "TACA CCA",
1642
+ "TTA GAA",
1643
+ "TC TTGC",
1644
+ "TGA GA",
1645
+ "AAA CCA",
1646
+ "GG TTTCA",
1647
+ "TAAA CA",
1648
+ "TAC TC",
1649
+ "TCAA CC",
1650
+ "GTT TC",
1651
+ "GGC GA",
1652
+ "GC TTTT",
1653
+ "_ TG",
1654
+ "TTAA AA",
1655
+ "TT GTG",
1656
+ "GAAC AA",
1657
+ "TTAA CA",
1658
+ "TTGG CA",
1659
+ "GTATTTA GCC",
1660
+ "TA TCAC",
1661
+ "TTACCGCGGCTGCTGG CAC",
1662
+ "GTTC AA",
1663
+ "TGC AAAA",
1664
+ "GG TTTC",
1665
+ "TCCC AA",
1666
+ "TCTC GTAC",
1667
+ "TT GTCA",
1668
+ "GC TACC",
1669
+ "TTTA CC",
1670
+ "TCC TAC",
1671
+ "TTGG AA",
1672
+ "TCA GAC",
1673
+ "TAA TAA",
1674
+ "GAAC TGTCTCACGAC",
1675
+ "GATT AAC",
1676
+ "TC TTTC",
1677
+ "TTA GGA",
1678
+ "GCA CA",
1679
+ "GGC TTC",
1680
+ "TTGG GTT",
1681
+ "GCAA CA",
1682
+ "TGA GC",
1683
+ "TGG TA",
1684
+ "GAACTGTCTCACGAC GTTC",
1685
+ "_ TCCC",
1686
+ "GTG AAA",
1687
+ "_ TCA",
1688
+ "TGG GG",
1689
+ "TTA CCC",
1690
+ "GG GAAC",
1691
+ "GG GTTC",
1692
+ "GGA TGG",
1693
+ "AACC TCC",
1694
+ "TGGC TGCTTCTAAGCC",
1695
+ "TGG GCC",
1696
+ "TTA TTC",
1697
+ "GTA TTACCGCGGCTGCTGGCAC",
1698
+ "TTTCA CCCC",
1699
+ "AAC AAC",
1700
+ "TTTA CA",
1701
+ "GTAAA C",
1702
+ "GCCA CC",
1703
+ "TTTT AA",
1704
+ "CCCA GCTC",
1705
+ "TTA GATG",
1706
+ "TAA GG",
1707
+ "TA TG",
1708
+ "AAAC AA",
1709
+ "GTGTCTCA GTT",
1710
+ "GCCCCA GGA",
1711
+ "TG TGTC",
1712
+ "GTCC CA",
1713
+ "GA TTAC",
1714
+ "GTCC CC",
1715
+ "TAC GCC",
1716
+ "TGGGTT GTT",
1717
+ "TTC GGA",
1718
+ "GAA GAA",
1719
+ "GATA GGGACC",
1720
+ "GTTA CA",
1721
+ "TACC AA",
1722
+ "GTTA CC",
1723
+ "TAC GGA",
1724
+ "GACA TCGA",
1725
+ "TGA GCCA",
1726
+ "TACCAGGGTATCTAATCC TGTT",
1727
+ "TATC GG",
1728
+ "TAC GA",
1729
+ "TAA GCA",
1730
+ "TCACC AAC",
1731
+ "TGGACC GTGTCTCAGTT",
1732
+ "TAA GTT",
1733
+ "_ AA",
1734
+ "GACC TTAGC",
1735
+ "TC TTTA",
1736
+ "TGAC AA",
1737
+ "TCAA CA",
1738
+ "GC TACA",
1739
+ "TCC GACC",
1740
+ "GAA CCA",
1741
+ "GC GGCA",
1742
+ "TCA TCA",
1743
+ "AATA TTCC",
1744
+ "TCTT TCC",
1745
+ "G TCCAC",
1746
+ "GCCC AA",
1747
+ "GAA GA",
1748
+ "CCACC GGATCAC",
1749
+ "GC GTG",
1750
+ "TTC TCC",
1751
+ "GCAA AA",
1752
+ "GCC TGC",
1753
+ "_ TGG",
1754
+ "GTC TC",
1755
+ "TCGC TTTC",
1756
+ "CCAC TGCTGCCTCCCGTAGGAGTC",
1757
+ "TA TACC",
1758
+ "TA TTTA",
1759
+ "GGGG TTC",
1760
+ "GCA GA",
1761
+ "TGCC TTC",
1762
+ "GCA GG",
1763
+ "TTA GTA",
1764
+ "TGG TCC",
1765
+ "GTCA CC",
1766
+ "TGA TCC",
1767
+ "TGGACCGTGTCTCAGTT CCAGTG",
1768
+ "GCTT TAC",
1769
+ "TGCC AA",
1770
+ "GCC GTC",
1771
+ "TCTT TAAA",
1772
+ "_ TCTC",
1773
+ "TGG TC",
1774
+ "GCC GTT",
1775
+ "GC TTCA",
1776
+ "TTC GGC",
1777
+ "TT GAAA",
1778
+ "GTA TTCACC",
1779
+ "TA GTG",
1780
+ "GCA GGC",
1781
+ "GTT GTT",
1782
+ "TAAA CC",
1783
+ "TA CCAC",
1784
+ "TGA GG",
1785
+ "TT TAAC",
1786
+ "TAA AAA",
1787
+ "_ CA",
1788
+ "GGAA AA",
1789
+ "TTTTCACC TTTCCCTCACGGTAC",
1790
+ "GG TGGA",
1791
+ "TCCCC CCA",
1792
+ "GTACTCCCCA GGC",
1793
+ "GCA GC",
1794
+ "TTA GA",
1795
+ "TAGC TGTC",
1796
+ "TAC GC",
1797
+ "GTT TCA",
1798
+ "GGA GA",
1799
+ "GAA TC",
1800
+ "GACC GCCCCA",
1801
+ "TGA TCATCC",
1802
+ "GTA GA",
1803
+ "GCTT GTGC",
1804
+ "GTAA CA",
1805
+ "GACC AA",
1806
+ "TG TTAA",
1807
+ "TG TTTA",
1808
+ "GCTA TTACGCTTTCTTTAAA",
1809
+ "TTA TA",
1810
+ "TC GTG",
1811
+ "TTAC AA",
1812
+ "TAC AAA",
1813
+ "TTA TCC",
1814
+ "TTAC GCAC",
1815
+ "TCAC TTC",
1816
+ "TGCC CA",
1817
+ "GAACTGTCTCACGACGTTC TGAA",
1818
+ "TATTTCAC TCCCC",
1819
+ "TG CCAC",
1820
+ "_ GG",
1821
+ "GCC GC",
1822
+ "GCC CCC",
1823
+ "TAA TAC",
1824
+ "TGA TGA",
1825
+ "_ TGC",
1826
+ "TTC GA",
1827
+ "TA TTCC",
1828
+ "GAA AAA",
1829
+ "CCC TTCCA",
1830
+ "GGCA CC",
1831
+ "TG TTGC",
1832
+ "GAAA CA",
1833
+ "TA TAAA",
1834
+ "TCA TAA",
1835
+ "TT GTGC",
1836
+ "TA TTTC",
1837
+ "TGCA CA",
1838
+ "GTCAA TTCCTTTGAGTT",
1839
+ "GACCGCCCCA GTCAAAC",
1840
+ "TT GTAC",
1841
+ "GC TTGC",
1842
+ "GTAA AA",
1843
+ "TA TCAA",
1844
+ "GAC AAA",
1845
+ "GACATCGA GGTGCC",
1846
+ "TGC TCA",
1847
+ "GTA TCA",
1848
+ "GGGAAC GTATTCACC",
1849
+ "TC TCAA",
1850
+ "TTAA CCA",
1851
+ "GC TCAC",
1852
+ "TC GGTA",
1853
+ "TA TGCC",
1854
+ "GAAA CC",
1855
+ "TCC TC",
1856
+ "TT GACC",
1857
+ "CAC TGC",
1858
+ "GTA TGC",
1859
+ "CCAC GCTTTC",
1860
+ "GAC TCGCTTTC",
1861
+ "TCA GGC",
1862
+ "GC TCCA",
1863
+ "GACTAA CCC",
1864
+ "TGGC AA",
1865
+ "TCA GGA",
1866
+ "GCC AAA",
1867
+ "GCA GCA",
1868
+ "TTTTA TCC",
1869
+ "TGG AAA",
1870
+ "TG TGAC",
1871
+ "TT GTGG",
1872
+ "GTC GAGTT",
1873
+ "GGAC GTTA",
1874
+ "GG TTCC",
1875
+ "TGG GA",
1876
+ "TTA TTA",
1877
+ "GAA CCACCGGATCAC",
1878
+ "GAA GTT",
1879
+ "TCC AAA",
1880
+ "GC TTTA",
1881
+ "CCC AAC",
1882
+ "TC TGAC",
1883
+ "GGCC GAC",
1884
+ "TCA GA",
1885
+ "TA GGCA",
1886
+ "TCGC TACTCA",
1887
+ "TG TCAA",
1888
+ "TTGC AA",
1889
+ "GAACTGTCTCACGACGTTCTGAA CCCAGCTC",
1890
+ "TA GGAA",
1891
+ "GC TGAA",
1892
+ "TGAGCCA TTACC",
1893
+ "TGGA CA",
1894
+ "GG TTTT",
1895
+ "TG TTTC",
1896
+ "TT GTTC",
1897
+ "CCA GTGA",
1898
+ "TTC TCCA",
1899
+ "TC TTGA",
1900
+ "TGGC GAACA",
1901
+ "TTTTC AAC",
1902
+ "GTG TGTA",
1903
+ "TAA TCA",
1904
+ "CAC TATC",
1905
+ "GTT TTA",
1906
+ "GTCGAGTT GCAGAC",
1907
+ "GTAA CC",
1908
+ "TT TGAC",
1909
+ "TCATTA TGCAAAA",
1910
+ "GTCA TCC",
1911
+ "TGA TC",
1912
+ "TACC TCCA",
1913
+ "TGGCGAACA GCCA",
1914
+ "GAA GC",
1915
+ "GAA GG",
1916
+ "GCTC GCC",
1917
+ "CATC GTT",
1918
+ "GGAA TA",
1919
+ "TT TGCA",
1920
+ "CATC TTCC",
1921
+ "TGC GA",
1922
+ "TTGA CA",
1923
+ "TACC CC",
1924
+ "TACA CA",
1925
+ "GTT GAGC",
1926
+ "TG TGAA",
1927
+ "TGG TTC",
1928
+ "TGC TGA",
1929
+ "TGCA TGC",
1930
+ "GCA TAC",
1931
+ "TAAC AA",
1932
+ "GAC GG",
1933
+ "TG TAAA",
1934
+ "TCCC GAA",
1935
+ "TGGCGAACAGCCA TACCC",
1936
+ "GCAC TTCTGA",
1937
+ "GTAC AA",
1938
+ "TAA GTC",
1939
+ "GTCA GTA",
1940
+ "TT TTAC",
1941
+ "TTC TA",
1942
+ "GCC GCC",
1943
+ "GC TATC",
1944
+ "TGAGCCATTACC TCACCAAC",
1945
+ "GGCC CC",
1946
+ "GA TAAA",
1947
+ "TA TACA",
1948
+ "TCGAC TAGTGA",
1949
+ "TAC TTTC",
1950
+ "GG TGAA",
1951
+ "TG TTCA",
1952
+ "CATCTTCC GCGCA",
1953
+ "_ TGCC",
1954
+ "GA TCTC",
1955
+ "GTGG AA",
1956
+ "TG TCCA",
1957
+ "GACA CC",
1958
+ "GGTC TGGGTTGTT",
1959
+ "TTA CCA",
1960
+ "GCC GGC",
1961
+ "GAA GGC",
1962
+ "TGC AAC",
1963
+ "GC TCCCC",
1964
+ "TGGC TGC",
1965
+ "GCCTCC GTTAC",
1966
+ "GC TCCC",
1967
+ "TCC GAA",
1968
+ "GTC AAA",
1969
+ "GGA GTTAGCC",
1970
+ "GAA CCC",
1971
+ "TTACC AA",
1972
+ "_ GCC",
1973
+ "GCTC GAC",
1974
+ "GA GTTC",
1975
+ "TGAC TGATCATCC",
1976
+ "GAC TTAA",
1977
+ "GG TACA",
1978
+ "GA TCAA",
1979
+ "GGTCC TCTCGTAC",
1980
+ "AAC TTC",
1981
+ "GA TGGC",
1982
+ "TTTCACCCC TA",
1983
+ "GTTTCC CAC",
1984
+ "GGAA CC",
1985
+ "TAA TCC",
1986
+ "TTTTC GCC",
1987
+ "TGGCGAACAGCCATACCC TTGGGACC",
1988
+ "TAGC GATTCC",
1989
+ "GTCC AA",
1990
+ "GCA GCC",
1991
+ "CCA GTA",
1992
+ "GTG TCA",
1993
+ "TTTCA CA",
1994
+ "GGA GGC",
1995
+ "TAA TC",
1996
+ "TA TATC",
1997
+ "TAA TTC",
1998
+ "GTGGAC TACCAGGGTATCTAATCCTGTT",
1999
+ "TG TTGG",
2000
+ "TGG TTTCA",
2001
+ "GTGG CA",
2002
+ "GA TTTT",
2003
+ "GTGC AA",
2004
+ "TGC TGC",
2005
+ "TGAA CC",
2006
+ "TTACGCAC TCTTTAAA",
2007
+ "GGACGTTA GCACCC",
2008
+ "TC TTCA",
2009
+ "GCTT TTC",
2010
+ "CCACTGCTGCCTCCCGTAGGAGTC TGGACCGTGTCTCAGTTCCAGTG",
2011
+ "GAA GAC",
2012
+ "GCATTC GCACTTCTGA",
2013
+ "GTA GC",
2014
+ "GG GAAA",
2015
+ "TCCAC AA",
2016
+ "GGCC AA",
2017
+ "TGG GGC",
2018
+ "GTG CCA",
2019
+ "TGAA CA",
2020
+ "GC TGGC",
2021
+ "_ GTA",
2022
+ "GGTCTGGGTTGTT TCCC",
2023
+ "TTAGATG TTTCA",
2024
+ "GCTT GCACCC",
2025
+ "TCC CCC",
2026
+ "GGAA TTTC",
2027
+ "TTATAC AAAA",
2028
+ "GTATTACCGCGGCTGCTGGCAC GGAGTTAGCC",
2029
+ "GCCA CA",
2030
+ "GA TTTC",
2031
+ "GGA CCA",
2032
+ "GTCA CA",
2033
+ "CCCA CC",
2034
+ "GG TCTC",
2035
+ "GGC TC",
2036
+ "TGTGTC GGTT",
2037
+ "TTTGG GACCTTAGC",
2038
+ "CAC GAC",
2039
+ "TC GGTT",
2040
+ "TG TGTT",
2041
+ "GGC TCATTATGCAAAA",
2042
+ "TGC AAA",
2043
+ "GTT TA",
2044
+ "GTAA TTCC",
2045
+ "TGG TTCAC",
2046
+ "GTACC TTTTATCC",
2047
+ "GC GTTC",
2048
+ "GA GAAC",
2049
+ "TAA GTA",
2050
+ "TACC CACCA",
2051
+ "TCA GCC",
2052
+ "GG AAACC",
2053
+ "TAC TTA",
2054
+ "TAA CCA",
2055
+ "CAC TTC",
2056
+ "TGC TAA",
2057
+ "_ GC",
2058
+ "TTC GTAC",
2059
+ "TC AAAC",
2060
+ "TGC TCGAC",
2061
+ "TGAA TGA",
2062
+ "TC TCAC",
2063
+ "GAC TAC",
2064
+ "TTA GATA",
2065
+ "GGGGTTC TTTTCGCC",
2066
+ "TACC CA",
2067
+ "TCGGTA TTCC",
2068
+ "GTC TA",
2069
+ "CCAC AA",
2070
+ "TCC GG",
2071
+ "TAGG GC",
2072
+ "TAA GA",
2073
+ "GTC TCGCA",
2074
+ "GCTA CAC",
2075
+ "TT TGGA",
2076
+ "TTA GTC",
2077
+ "TCCACC GCTTGTGC",
2078
+ "TT TACAA",
2079
+ "GGC AAA",
2080
+ "GTTA TA",
2081
+ "TT AAAC",
2082
+ "TG TCAC",
2083
+ "TA TATT",
2084
+ "GTATTTAGCC TTGGA",
2085
+ "GTC AAC",
2086
+ "GG GCCA",
2087
+ "TCA TTC",
2088
+ "TGGCTGCTTCTAAGCC AACCTCC",
2089
+ "GTC GA",
2090
+ "GCTGA CCCA",
2091
+ "TT TGGC",
2092
+ "GG GCGG",
2093
+ "TG TTGA",
2094
+ "TTCC CC",
2095
+ "TTTC GG",
2096
+ "GTT CCC",
2097
+ "TGA TGG",
2098
+ "TG TAAC",
2099
+ "TA TGTA",
2100
+ "GGA TCA",
2101
+ "GTT AAA",
2102
+ "GGAA CA",
2103
+ "TTA TCCA",
2104
+ "GGC GG",
2105
+ "TG TCTC",
2106
+ "_ GA",
2107
+ "TA GAAC",
2108
+ "GCTTTAC GCCCA",
2109
+ "TAC TGA",
2110
+ "GCAA CC",
2111
+ "TTATACAAAA GGTAC",
2112
+ "TGA GTC",
2113
+ "GAGC TGAC",
2114
+ "GTT GA",
2115
+ "TGG AAC",
2116
+ "TT GGAC",
2117
+ "TG AAAC",
2118
+ "TCTC AAACCA",
2119
+ "GTAGG AAACC",
2120
+ "TTA TGA",
2121
+ "TG TACA",
2122
+ "TTC TGC",
2123
+ "GTC AAAA",
2124
+ "AACA CC",
2125
+ "TTC TCACC",
2126
+ "TTA TAA",
2127
+ "GGA TA",
2128
+ "TTA TTCA",
2129
+ "TTACC CC",
2130
+ "GCA TCA",
2131
+ "TTC GTGCA",
2132
+ "GTT TTC",
2133
+ "TAAA TCA",
2134
+ "TGCTCC CCACGCTTTC",
2135
+ "GTGA CA",
2136
+ "TGCC CC",
2137
+ "GGCC CA",
2138
+ "GCC CAC",
2139
+ "TA TAGC",
2140
+ "GGC GGC",
2141
+ "AAC TTCA",
2142
+ "TA TGACC",
2143
+ "TCAC AA",
2144
+ "GTGC TCTACC",
2145
+ "GCA TGA",
2146
+ "GAA TAA",
2147
+ "TAC TGC",
2148
+ "GTC GG",
2149
+ "GC TTAC",
2150
+ "TA TCCA",
2151
+ "GCTCGCC GCTAC",
2152
+ "TCGACTAGTGA GCTATTACGCTTTCTTTAAA",
2153
+ "TG TCCC",
2154
+ "GACC CC",
2155
+ "TGA GTA",
2156
+ "GGGGTTCTTTTCGCC TTTCCCTCACGGTAC",
2157
+ "TT TGTAA",
2158
+ "GAA TCA",
2159
+ "GCAC AA",
2160
+ "GAA TTC",
2161
+ "GTTTGA TTGGCC",
2162
+ "TTCCAA GCC",
2163
+ "CCA GCTA",
2164
+ "TGTC TCCC",
2165
+ "GTTGAGC GATGG",
2166
+ "TCC AAC",
2167
+ "GAA CCCC",
2168
+ "TGAC GAGCA",
2169
+ "TGAA TGGCTGC",
2170
+ "GTTA CATCTTCCGCGCA",
2171
+ "TCTCA GACCA",
2172
+ "GGA TGGCTGCTTCTAAGCCAACCTCC",
2173
+ "TG TTAC",
2174
+ "GACTCGCTTTC GCTAC",
2175
+ "TGG GAC",
2176
+ "GG TACC",
2177
+ "GGA GC",
2178
+ "GGGCC CCC",
2179
+ "TAAAC AA",
2180
+ "GTC GCC",
2181
+ "GA GGAA",
2182
+ "GAC TTTC",
2183
+ "TTTT TGA",
2184
+ "AAA GTT",
2185
+ "AAAA CC",
2186
+ "TA GAAA",
2187
+ "GG TTAA",
2188
+ "GC GCAA"
2189
+ ]
2190
+ }
2191
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bcccad15ab3d220976cbfaf12718e4236f2159e5d0915d95c91f8d866b6569c
3
+ size 6128
tokenizer_config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[BOS]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[EOS]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "[MASK]",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ }
51
+ },
52
+ "bos_token": "[BOS]",
53
+ "clean_up_tokenization_spaces": false,
54
+ "cls_token": null,
55
+ "do_lower_case": null,
56
+ "eos_token": "[EOS]",
57
+ "extra_special_tokens": {},
58
+ "mask_token": "[MASK]",
59
+ "model_max_length": 512,
60
+ "pad_token": "[PAD]",
61
+ "sep_token": "[SEP]",
62
+ "tokenizer_class": "PreTrainedTokenizerFast",
63
+ "unk_token": "[UNK]"
64
+ }