Ligeti Balázs
commited on
Commit
·
12bee07
1
Parent(s):
d514933
Tokenizer base
Browse files- config_utils.py +443 -0
- data/prokbert_vocabs/prokbert-base-dna1/vocab.txt +9 -0
- data/prokbert_vocabs/prokbert-base-dna2/vocab.txt +21 -0
- data/prokbert_vocabs/prokbert-base-dna3/vocab.txt +69 -0
- data/prokbert_vocabs/prokbert-base-dna4/vocab.txt +261 -0
- data/prokbert_vocabs/prokbert-base-dna5/vocab.txt +1029 -0
- data/prokbert_vocabs/prokbert-base-dna6/vocab.txt +4101 -0
- data/prokbert_vocabs/prokbert-base-dna7/vocab.txt +0 -0
- data/prokbert_vocabs/prokbert-base-dna8/vocab.txt +0 -0
- data/prokbert_vocabs/prokbert-base-dna9/vocab.txt +0 -0
- general_utils.py +304 -0
- prokbert_tokenizer.py +429 -0
- special_tokens_map.json +7 -0
- tokenizer_config.json +6 -0
- vocab.txt +1 -0
config_utils.py
ADDED
@@ -0,0 +1,443 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Config utils
|
2 |
+
import yaml
|
3 |
+
import pathlib
|
4 |
+
from os.path import join
|
5 |
+
import os
|
6 |
+
import numpy as np
|
7 |
+
import torch
|
8 |
+
from multiprocessing import cpu_count
|
9 |
+
|
10 |
+
class BaseConfig:
|
11 |
+
"""Base class for managing and validating configurations."""
|
12 |
+
|
13 |
+
numpy_dtype_mapping = {1: np.int8,
|
14 |
+
2: np.int16,
|
15 |
+
8: np.int64,
|
16 |
+
4: np.int32}
|
17 |
+
|
18 |
+
def __init__(self):
|
19 |
+
super().__init__()
|
20 |
+
|
21 |
+
def cast_to_expected_type(self, parameter_class: str, parameter_name: str, value: any) -> any:
|
22 |
+
"""
|
23 |
+
Cast the given value to the expected type.
|
24 |
+
|
25 |
+
:param parameter_class: The class/category of the parameter.
|
26 |
+
:type parameter_class: str
|
27 |
+
:param parameter_name: The name of the parameter.
|
28 |
+
:type parameter_name: str
|
29 |
+
:param value: The value to be casted.
|
30 |
+
:type value: any
|
31 |
+
:return: Value casted to the expected type.
|
32 |
+
:rtype: any
|
33 |
+
:raises ValueError: If casting fails.
|
34 |
+
"""
|
35 |
+
expected_type = self.parameters[parameter_class][parameter_name]['type']
|
36 |
+
|
37 |
+
if expected_type in ["integer", "int"]:
|
38 |
+
try:
|
39 |
+
return int(value)
|
40 |
+
except ValueError:
|
41 |
+
raise ValueError(f"Failed to cast value '{value}' to integer for parameter '{parameter_name}' in class '{parameter_class}'.")
|
42 |
+
elif expected_type == "float":
|
43 |
+
try:
|
44 |
+
return float(value)
|
45 |
+
except ValueError:
|
46 |
+
raise ValueError(f"Failed to cast value '{value}' to float for parameter '{parameter_name}' in class '{parameter_class}'.")
|
47 |
+
elif expected_type in ["string", "str"]:
|
48 |
+
return str(value)
|
49 |
+
elif expected_type in ["boolean", "bool"]:
|
50 |
+
if isinstance(value, bool):
|
51 |
+
return value
|
52 |
+
elif str(value).lower() == "true":
|
53 |
+
return True
|
54 |
+
elif str(value).lower() == "false":
|
55 |
+
return False
|
56 |
+
else:
|
57 |
+
raise ValueError(f"Failed to cast value '{value}' to boolean for parameter '{parameter_name}' in class '{parameter_class}'.")
|
58 |
+
elif expected_type == "type":
|
59 |
+
# For this type, we will simply return the value without casting.
|
60 |
+
# It assumes the configuration provides valid Python types.
|
61 |
+
return value
|
62 |
+
elif expected_type == "list":
|
63 |
+
if isinstance(value, list):
|
64 |
+
return value
|
65 |
+
else:
|
66 |
+
raise ValueError(f"Failed to validate value '{value}' as a list for parameter '{parameter_name}' in class '{parameter_class}'.")
|
67 |
+
elif expected_type == "tuple":
|
68 |
+
if isinstance(value, tuple):
|
69 |
+
return value
|
70 |
+
else:
|
71 |
+
raise ValueError(f"Failed to validate value '{value}' as a tuple for parameter '{parameter_name}' in class '{parameter_class}'.")
|
72 |
+
elif expected_type == "set":
|
73 |
+
if isinstance(value, set):
|
74 |
+
return value
|
75 |
+
else:
|
76 |
+
raise ValueError(f"Failed to validate value '{value}' as a set for parameter '{parameter_name}' in class '{parameter_class}'.")
|
77 |
+
elif expected_type == "dict":
|
78 |
+
if isinstance(value, dict):
|
79 |
+
return value
|
80 |
+
else:
|
81 |
+
raise ValueError(f"Failed to validate value '{value}' as a dict for parameter '{parameter_name}' in class '{parameter_class}'.")
|
82 |
+
else:
|
83 |
+
raise ValueError(f"Unknown expected type '{expected_type}' for parameter '{parameter_name}' in class '{parameter_class}'.")
|
84 |
+
|
85 |
+
|
86 |
+
|
87 |
+
def get_parameter(self, parameter_class: str, parameter_name: str) -> any:
|
88 |
+
"""
|
89 |
+
Retrieve the default value of a specified parameter.
|
90 |
+
|
91 |
+
:param parameter_class: The class/category of the parameter (e.g., 'segmentation').
|
92 |
+
:type parameter_class: str
|
93 |
+
:param parameter_name: The name of the parameter.
|
94 |
+
:type parameter_name: str
|
95 |
+
:return: Default value of the parameter, casted to the expected type.
|
96 |
+
:rtype: any
|
97 |
+
"""
|
98 |
+
default_value = self.parameters[parameter_class][parameter_name]['default']
|
99 |
+
return self.cast_to_expected_type(parameter_class, parameter_name, default_value)
|
100 |
+
|
101 |
+
|
102 |
+
|
103 |
+
def validate_type(self, parameter_class: str, parameter_name: str, value: any) -> bool:
|
104 |
+
"""
|
105 |
+
Validate the type of a given value against the expected type.
|
106 |
+
|
107 |
+
:param parameter_class: The class/category of the parameter.
|
108 |
+
:type parameter_class: str
|
109 |
+
:param parameter_name: The name of the parameter.
|
110 |
+
:type parameter_name: str
|
111 |
+
:param value: The value to be validated.
|
112 |
+
:type value: any
|
113 |
+
:return: True if the value is of the expected type, otherwise False.
|
114 |
+
:rtype: bool
|
115 |
+
"""
|
116 |
+
expected_type = self.parameters[parameter_class][parameter_name]['type']
|
117 |
+
|
118 |
+
if expected_type == "integer" and not isinstance(value, int):
|
119 |
+
return False
|
120 |
+
elif expected_type == "float" and not isinstance(value, float):
|
121 |
+
return False
|
122 |
+
elif expected_type == "string" and not isinstance(value, str):
|
123 |
+
return False
|
124 |
+
else:
|
125 |
+
return True
|
126 |
+
|
127 |
+
def validate_value(self, parameter_class: str, parameter_name: str, value: any) -> bool:
|
128 |
+
"""
|
129 |
+
Validate the value of a parameter against its constraints.
|
130 |
+
|
131 |
+
:param parameter_class: The class/category of the parameter.
|
132 |
+
:type parameter_class: str
|
133 |
+
:param parameter_name: The name of the parameter.
|
134 |
+
:type parameter_name: str
|
135 |
+
:param value: The value to be validated.
|
136 |
+
:type value: any
|
137 |
+
:return: True if the value meets the constraints, otherwise False.
|
138 |
+
:rtype: bool
|
139 |
+
"""
|
140 |
+
constraints = self.parameters[parameter_class][parameter_name].get('constraints', {})
|
141 |
+
|
142 |
+
if 'options' in constraints and value not in constraints['options']:
|
143 |
+
return False
|
144 |
+
if 'min' in constraints and value < constraints['min']:
|
145 |
+
return False
|
146 |
+
if 'max' in constraints and value > constraints['max']:
|
147 |
+
return False
|
148 |
+
return True
|
149 |
+
|
150 |
+
|
151 |
+
def validate(self, parameter_class: str, parameter_name: str, value: any):
|
152 |
+
"""
|
153 |
+
Validate both the type and value of a parameter.
|
154 |
+
|
155 |
+
:param parameter_class: The class/category of the parameter.
|
156 |
+
:type parameter_class: str
|
157 |
+
:param parameter_name: The name of the parameter.
|
158 |
+
:type parameter_name: str
|
159 |
+
:param value: The value to be validated.
|
160 |
+
:type value: any
|
161 |
+
:raises TypeError: If the value is not of the expected type.
|
162 |
+
:raises ValueError: If the value does not meet the parameter's constraints.
|
163 |
+
"""
|
164 |
+
if not self.validate_type(parameter_class, parameter_name, value):
|
165 |
+
raise TypeError(f"Invalid type for {parameter_name} for parameter class '{parameter_class}'. Expected {self.parameters[parameter_class][parameter_name]['type']}.")
|
166 |
+
|
167 |
+
if not self.validate_value(parameter_class, parameter_name, value):
|
168 |
+
raise ValueError(f"Invalid value for {parameter_name} for parameter class '{parameter_class}'. Constraints: {self.parameters[parameter_class][parameter_name].get('constraints', {})}.")
|
169 |
+
|
170 |
+
def describe(self, parameter_class: str, parameter_name: str) -> str:
|
171 |
+
"""
|
172 |
+
Retrieve the description of a parameter.
|
173 |
+
|
174 |
+
:param parameter_class: The class/category of the parameter.
|
175 |
+
:type parameter_class: str
|
176 |
+
:param parameter_name: The name of the parameter.
|
177 |
+
:type parameter_name: str
|
178 |
+
:return: Description of the parameter.
|
179 |
+
:rtype: str
|
180 |
+
"""
|
181 |
+
return self.parameters[parameter_class][parameter_name]['description']
|
182 |
+
|
183 |
+
|
184 |
+
|
185 |
+
class SeqConfig(BaseConfig):
|
186 |
+
"""Class to manage and validate sequence processing configurations."""
|
187 |
+
|
188 |
+
def __init__(self):
|
189 |
+
super().__init__()
|
190 |
+
self.default_seq_config_file = self._get_default_sequence_processing_config_file()
|
191 |
+
with open(self.default_seq_config_file, 'r') as file:
|
192 |
+
self.parameters = yaml.safe_load(file)
|
193 |
+
|
194 |
+
# Some postprocessing steps
|
195 |
+
self.parameters['tokenization']['shift']['constraints']['max'] = self.parameters['tokenization']['kmer']['default']-1
|
196 |
+
# Ha valaki update-li a k-mer paramter-t, akkor triggerelni kellene, hogy mi legyen.
|
197 |
+
|
198 |
+
self.get_and_set_segmentation_parameters()
|
199 |
+
self.get_and_set_tokenization_parameters()
|
200 |
+
self.get_and_set_computational_parameters()
|
201 |
+
|
202 |
+
def _get_default_sequence_processing_config_file(self) -> str:
|
203 |
+
"""
|
204 |
+
Retrieve the default sequence processing configuration file.
|
205 |
+
|
206 |
+
:return: Path to the configuration file.
|
207 |
+
:rtype: str
|
208 |
+
"""
|
209 |
+
current_path = pathlib.Path(__file__).parent
|
210 |
+
prokbert_seq_config_file = join(current_path, 'configs', 'sequence_processing.yaml')
|
211 |
+
self.current_path = current_path
|
212 |
+
|
213 |
+
try:
|
214 |
+
# Attempt to read the environment variable
|
215 |
+
prokbert_seq_config_file = os.environ['SEQ_CONFIG_FILE']
|
216 |
+
except KeyError:
|
217 |
+
# Handle the case when the environment variable is not found
|
218 |
+
print("SEQ_CONFIG_FILE environment variable has not been set. Using default value: {0}".format(prokbert_seq_config_file))
|
219 |
+
return prokbert_seq_config_file
|
220 |
+
|
221 |
+
|
222 |
+
def get_and_set_segmentation_parameters(self, parameters: dict = {}) -> dict:
|
223 |
+
"""
|
224 |
+
Retrieve and validate the provided parameters for segmentation.
|
225 |
+
|
226 |
+
:param parameters: A dictionary of parameters to be validated.
|
227 |
+
:type parameters: dict
|
228 |
+
:return: A dictionary of validated segmentation parameters.
|
229 |
+
:rtype: dict
|
230 |
+
:raises ValueError: If an invalid segmentation parameter is provided.
|
231 |
+
"""
|
232 |
+
segmentation_params = {k: self.get_parameter('segmentation', k) for k in self.parameters['segmentation']}
|
233 |
+
|
234 |
+
for param, param_value in parameters.items():
|
235 |
+
if param not in segmentation_params:
|
236 |
+
raise ValueError(f"The provided {param} is an INVALID segmentation parameter! The valid parameters are: {list(segmentation_params.keys())}")
|
237 |
+
self.validate('segmentation', param, param_value)
|
238 |
+
segmentation_params[param] = param_value
|
239 |
+
self.segmentation_params = segmentation_params
|
240 |
+
|
241 |
+
|
242 |
+
return segmentation_params
|
243 |
+
|
244 |
+
|
245 |
+
def get_and_set_tokenization_parameters(self, parameters: dict = {}) -> dict:
|
246 |
+
# Updating the other parameters if necesseary, i.e. if k-mer has-been changed, then the shift is updated and we run a parameter check at the end
|
247 |
+
|
248 |
+
tokenization_params = {k: self.get_parameter('tokenization', k) for k in self.parameters['tokenization']}
|
249 |
+
for param, param_value in parameters.items():
|
250 |
+
if param not in tokenization_params:
|
251 |
+
raise ValueError(f"The provided {param} is an INVALID tokenization parameter! The valid parameters are: {list(tokenization_params.keys())}")
|
252 |
+
self.validate('tokenization', param, param_value)
|
253 |
+
tokenization_params[param] = param_value
|
254 |
+
|
255 |
+
# Loading and check the vocab file. It is assumed that its ordered dictionary
|
256 |
+
vocabfile=tokenization_params['vocabfile']
|
257 |
+
act_kmer = tokenization_params['kmer']
|
258 |
+
if vocabfile=='auto':
|
259 |
+
print(self.current_path)
|
260 |
+
vocabfile_path = join(self.current_path, 'data/prokbert_vocabs/', f'prokbert-base-dna{act_kmer}', 'vocab.txt')
|
261 |
+
tokenization_params['vocabfile'] = vocabfile_path
|
262 |
+
else:
|
263 |
+
vocabfile_path = vocabfile
|
264 |
+
with open(vocabfile_path) as vocabfile_in:
|
265 |
+
vocabmap = {line.strip(): i for i, line in enumerate(vocabfile_in)}
|
266 |
+
tokenization_params['vocabmap'] = vocabmap
|
267 |
+
|
268 |
+
# Loading the vocab
|
269 |
+
self.tokenization_params = tokenization_params
|
270 |
+
return tokenization_params
|
271 |
+
|
272 |
+
def get_and_set_computational_parameters(self, parameters: dict = {}) -> dict:
|
273 |
+
""" Reading and validating the computational paramters
|
274 |
+
"""
|
275 |
+
|
276 |
+
computational_params = {k: self.get_parameter('computation', k) for k in self.parameters['computation']}
|
277 |
+
core_count = cpu_count()
|
278 |
+
|
279 |
+
if computational_params['cpu_cores_for_segmentation'] == -1:
|
280 |
+
computational_params['cpu_cores_for_segmentation'] = core_count
|
281 |
+
|
282 |
+
if computational_params['cpu_cores_for_tokenization'] == -1:
|
283 |
+
computational_params['cpu_cores_for_tokenization'] = core_count
|
284 |
+
|
285 |
+
|
286 |
+
|
287 |
+
for param, param_value in parameters.items():
|
288 |
+
if param not in computational_params:
|
289 |
+
raise ValueError(f"The provided {param} is an INVALID computation parameter! The valid parameters are: {list(computational_params.keys())}")
|
290 |
+
self.validate('computation', param, param_value)
|
291 |
+
computational_params[param] = param_value
|
292 |
+
|
293 |
+
np_tokentype= SeqConfig.numpy_dtype_mapping[computational_params['numpy_token_integer_prec_byte']]
|
294 |
+
computational_params['np_tokentype'] = np_tokentype
|
295 |
+
self.computational_params = computational_params
|
296 |
+
return computational_params
|
297 |
+
|
298 |
+
|
299 |
+
def get_maximum_segment_length_from_token_count_from_params(self):
|
300 |
+
"""Calculating the maximum length of the segment from the token count """
|
301 |
+
max_token_counts = self.tokenization_params['token_limit']
|
302 |
+
shift = self.tokenization_params['shift']
|
303 |
+
kmer = self.tokenization_params['kmer']
|
304 |
+
return self.get_maximum_segment_length_from_token_count(max_token_counts, shift, kmer)
|
305 |
+
|
306 |
+
def get_maximum_token_count_from_max_length_from_params(self):
|
307 |
+
"""Calculating the maximum length of the segment from the token count """
|
308 |
+
|
309 |
+
|
310 |
+
max_segment_length = self.tokenization_params['max_segment_length']
|
311 |
+
shift = self.tokenization_params['shift']
|
312 |
+
kmer = self.tokenization_params['kmer']
|
313 |
+
max_token_count = self.get_maximum_token_count_from_max_length(max_segment_length, shift, kmer)
|
314 |
+
|
315 |
+
return max_token_count
|
316 |
+
|
317 |
+
@staticmethod
|
318 |
+
def get_maximum_segment_length_from_token_count(max_token_counts, shift, kmer):
|
319 |
+
"""Calcuates how long sequence can be covered
|
320 |
+
"""
|
321 |
+
|
322 |
+
max_segment_length = (max_token_counts-3)*shift + kmer
|
323 |
+
return max_segment_length
|
324 |
+
|
325 |
+
@staticmethod
|
326 |
+
def get_maximum_token_count_from_max_length(max_segment_length, shift, kmer):
|
327 |
+
"""Calcuates how long sequence can be covered
|
328 |
+
"""
|
329 |
+
max_token_count = int(np.ceil((max_segment_length - kmer)/shift+3))
|
330 |
+
return max_token_count
|
331 |
+
|
332 |
+
class ProkBERTConfig(BaseConfig):
|
333 |
+
"""Class to manage and validate pretraining configurations."""
|
334 |
+
|
335 |
+
torch_dtype_mapping = {1: torch.uint8,
|
336 |
+
2: torch.int16,
|
337 |
+
8: torch.int64,
|
338 |
+
4: torch.int32}
|
339 |
+
|
340 |
+
def __init__(self):
|
341 |
+
super().__init__()
|
342 |
+
|
343 |
+
self.default_pretrain_config_file = self._get_default_pretrain_config_file()
|
344 |
+
with open(self.default_pretrain_config_file, 'r') as file:
|
345 |
+
self.parameters = yaml.safe_load(file)
|
346 |
+
|
347 |
+
# Load and validate each parameter set
|
348 |
+
self.data_collator_params = self.get_set_parameters('data_collator')
|
349 |
+
self.model_params = self.get_set_parameters('model')
|
350 |
+
self.dataset_params = self.get_set_parameters('dataset')
|
351 |
+
self.pretraining_params = self.get_set_parameters('pretraining')
|
352 |
+
# Getting the sequtils params as well
|
353 |
+
|
354 |
+
self.def_seq_config = SeqConfig()
|
355 |
+
self.segmentation_params = self.def_seq_config.get_and_set_segmentation_parameters(self.parameters['segmentation'])
|
356 |
+
self.tokenization_params = self.def_seq_config.get_and_set_tokenization_parameters(self.parameters['tokenization'])
|
357 |
+
self.computation_params = self.def_seq_config.get_and_set_computational_parameters(self.parameters['computation'])
|
358 |
+
|
359 |
+
self.default_torchtype = ProkBERTConfig.torch_dtype_mapping[self.computation_params['numpy_token_integer_prec_byte']]
|
360 |
+
|
361 |
+
def _get_default_pretrain_config_file(self) -> str:
|
362 |
+
"""
|
363 |
+
Retrieve the default pretraining configuration file.
|
364 |
+
|
365 |
+
:return: Path to the configuration file.
|
366 |
+
:rtype: str
|
367 |
+
"""
|
368 |
+
current_path = pathlib.Path(__file__).parent
|
369 |
+
pretrain_config_file = join(current_path, 'configs', 'pretraining.yaml')
|
370 |
+
|
371 |
+
try:
|
372 |
+
# Attempt to read the environment variable
|
373 |
+
pretrain_config_file = os.environ['PRETRAIN_CONFIG_FILE']
|
374 |
+
except KeyError:
|
375 |
+
# Handle the case when the environment variable is not found
|
376 |
+
print(f"PRETRAIN_CONFIG_FILE environment variable has not been set. Using default value: {pretrain_config_file}")
|
377 |
+
return pretrain_config_file
|
378 |
+
|
379 |
+
def get_set_parameters(self, parameter_class: str, parameters: dict = {}) -> dict:
|
380 |
+
"""
|
381 |
+
Retrieve and validate the provided parameters for a given parameter class.
|
382 |
+
|
383 |
+
:param parameter_class: The class/category of the parameter (e.g., 'data_collator').
|
384 |
+
:type parameter_class: str
|
385 |
+
:param parameters: A dictionary of parameters to be validated.
|
386 |
+
:type parameters: dict
|
387 |
+
:return: A dictionary of validated parameters.
|
388 |
+
:rtype: dict
|
389 |
+
:raises ValueError: If an invalid parameter is provided.
|
390 |
+
"""
|
391 |
+
class_params = {k: self.get_parameter(parameter_class, k) for k in self.parameters[parameter_class]}
|
392 |
+
|
393 |
+
# First validatiading the class parameters as well
|
394 |
+
for param, param_value in class_params.items():
|
395 |
+
|
396 |
+
self.validate(parameter_class, param, param_value)
|
397 |
+
|
398 |
+
|
399 |
+
for param, param_value in parameters.items():
|
400 |
+
if param not in class_params:
|
401 |
+
raise ValueError(f"The provided {param} is an INVALID {parameter_class} parameter! The valid parameters are: {list(class_params.keys())}")
|
402 |
+
self.validate(parameter_class, param, param_value)
|
403 |
+
class_params[param] = param_value
|
404 |
+
|
405 |
+
return class_params
|
406 |
+
|
407 |
+
def get_and_set_model_parameters(self, parameters: dict = {}) -> dict:
|
408 |
+
""" Setting the model parameters """
|
409 |
+
|
410 |
+
self.model_params = self.get_set_parameters('model', parameters)
|
411 |
+
|
412 |
+
return self.model_params
|
413 |
+
|
414 |
+
def get_and_set_dataset_parameters(self, parameters: dict = {}) -> dict:
|
415 |
+
""" Setting the dataset parameters """
|
416 |
+
|
417 |
+
self.dataset_params = self.get_set_parameters('dataset', parameters)
|
418 |
+
|
419 |
+
return self.dataset_params
|
420 |
+
|
421 |
+
def get_and_set_pretraining_parameters(self, parameters: dict = {}) -> dict:
|
422 |
+
""" Setting the model parameters """
|
423 |
+
self.pretraining_params = self.get_set_parameters('pretraining', parameters)
|
424 |
+
|
425 |
+
return self.pretraining_params
|
426 |
+
|
427 |
+
|
428 |
+
def get_and_set_datacollator_parameters(self, parameters: dict = {}) -> dict:
|
429 |
+
""" Setting the model parameters """
|
430 |
+
self.data_collator_params = self.get_set_parameters('data_collator', parameters)
|
431 |
+
return self.data_collator_params
|
432 |
+
|
433 |
+
def get_and_set_segmentation_parameters(self, parameters: dict = {}) -> dict:
|
434 |
+
self.segmentation_params = self.def_seq_config.get_and_set_segmentation_parameters(parameters)
|
435 |
+
|
436 |
+
return self.segmentation_params
|
437 |
+
def get_and_set_tokenization_parameters(self, parameters: dict = {}) -> dict:
|
438 |
+
self.tokenization_params = self.def_seq_config.get_and_set_tokenization_parameters(parameters)
|
439 |
+
|
440 |
+
return self.tokenization_params
|
441 |
+
def get_and_set_computation_params(self, parameters: dict = {}) -> dict:
|
442 |
+
self.computation_params = self.def_seq_config.get_and_set_computational_parameters(parameters)
|
443 |
+
return self.computation_params
|
data/prokbert_vocabs/prokbert-base-dna1/vocab.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[PAD]
|
2 |
+
[UNK]
|
3 |
+
[CLS]
|
4 |
+
[SEP]
|
5 |
+
[MASK]
|
6 |
+
A
|
7 |
+
C
|
8 |
+
G
|
9 |
+
T
|
data/prokbert_vocabs/prokbert-base-dna2/vocab.txt
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[PAD]
|
2 |
+
[UNK]
|
3 |
+
[CLS]
|
4 |
+
[SEP]
|
5 |
+
[MASK]
|
6 |
+
AA
|
7 |
+
AC
|
8 |
+
AG
|
9 |
+
AT
|
10 |
+
CA
|
11 |
+
CC
|
12 |
+
CG
|
13 |
+
CT
|
14 |
+
GA
|
15 |
+
GC
|
16 |
+
GG
|
17 |
+
GT
|
18 |
+
TA
|
19 |
+
TC
|
20 |
+
TG
|
21 |
+
TT
|
data/prokbert_vocabs/prokbert-base-dna3/vocab.txt
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[PAD]
|
2 |
+
[UNK]
|
3 |
+
[CLS]
|
4 |
+
[SEP]
|
5 |
+
[MASK]
|
6 |
+
AAA
|
7 |
+
AAC
|
8 |
+
AAG
|
9 |
+
AAT
|
10 |
+
ACA
|
11 |
+
ACC
|
12 |
+
ACG
|
13 |
+
ACT
|
14 |
+
AGA
|
15 |
+
AGC
|
16 |
+
AGG
|
17 |
+
AGT
|
18 |
+
ATA
|
19 |
+
ATC
|
20 |
+
ATG
|
21 |
+
ATT
|
22 |
+
CAA
|
23 |
+
CAC
|
24 |
+
CAG
|
25 |
+
CAT
|
26 |
+
CCA
|
27 |
+
CCC
|
28 |
+
CCG
|
29 |
+
CCT
|
30 |
+
CGA
|
31 |
+
CGC
|
32 |
+
CGG
|
33 |
+
CGT
|
34 |
+
CTA
|
35 |
+
CTC
|
36 |
+
CTG
|
37 |
+
CTT
|
38 |
+
GAA
|
39 |
+
GAC
|
40 |
+
GAG
|
41 |
+
GAT
|
42 |
+
GCA
|
43 |
+
GCC
|
44 |
+
GCG
|
45 |
+
GCT
|
46 |
+
GGA
|
47 |
+
GGC
|
48 |
+
GGG
|
49 |
+
GGT
|
50 |
+
GTA
|
51 |
+
GTC
|
52 |
+
GTG
|
53 |
+
GTT
|
54 |
+
TAA
|
55 |
+
TAC
|
56 |
+
TAG
|
57 |
+
TAT
|
58 |
+
TCA
|
59 |
+
TCC
|
60 |
+
TCG
|
61 |
+
TCT
|
62 |
+
TGA
|
63 |
+
TGC
|
64 |
+
TGG
|
65 |
+
TGT
|
66 |
+
TTA
|
67 |
+
TTC
|
68 |
+
TTG
|
69 |
+
TTT
|
data/prokbert_vocabs/prokbert-base-dna4/vocab.txt
ADDED
@@ -0,0 +1,261 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[PAD]
|
2 |
+
[UNK]
|
3 |
+
[CLS]
|
4 |
+
[SEP]
|
5 |
+
[MASK]
|
6 |
+
AAAA
|
7 |
+
AAAC
|
8 |
+
AAAG
|
9 |
+
AAAT
|
10 |
+
AACA
|
11 |
+
AACC
|
12 |
+
AACG
|
13 |
+
AACT
|
14 |
+
AAGA
|
15 |
+
AAGC
|
16 |
+
AAGG
|
17 |
+
AAGT
|
18 |
+
AATA
|
19 |
+
AATC
|
20 |
+
AATG
|
21 |
+
AATT
|
22 |
+
ACAA
|
23 |
+
ACAC
|
24 |
+
ACAG
|
25 |
+
ACAT
|
26 |
+
ACCA
|
27 |
+
ACCC
|
28 |
+
ACCG
|
29 |
+
ACCT
|
30 |
+
ACGA
|
31 |
+
ACGC
|
32 |
+
ACGG
|
33 |
+
ACGT
|
34 |
+
ACTA
|
35 |
+
ACTC
|
36 |
+
ACTG
|
37 |
+
ACTT
|
38 |
+
AGAA
|
39 |
+
AGAC
|
40 |
+
AGAG
|
41 |
+
AGAT
|
42 |
+
AGCA
|
43 |
+
AGCC
|
44 |
+
AGCG
|
45 |
+
AGCT
|
46 |
+
AGGA
|
47 |
+
AGGC
|
48 |
+
AGGG
|
49 |
+
AGGT
|
50 |
+
AGTA
|
51 |
+
AGTC
|
52 |
+
AGTG
|
53 |
+
AGTT
|
54 |
+
ATAA
|
55 |
+
ATAC
|
56 |
+
ATAG
|
57 |
+
ATAT
|
58 |
+
ATCA
|
59 |
+
ATCC
|
60 |
+
ATCG
|
61 |
+
ATCT
|
62 |
+
ATGA
|
63 |
+
ATGC
|
64 |
+
ATGG
|
65 |
+
ATGT
|
66 |
+
ATTA
|
67 |
+
ATTC
|
68 |
+
ATTG
|
69 |
+
ATTT
|
70 |
+
CAAA
|
71 |
+
CAAC
|
72 |
+
CAAG
|
73 |
+
CAAT
|
74 |
+
CACA
|
75 |
+
CACC
|
76 |
+
CACG
|
77 |
+
CACT
|
78 |
+
CAGA
|
79 |
+
CAGC
|
80 |
+
CAGG
|
81 |
+
CAGT
|
82 |
+
CATA
|
83 |
+
CATC
|
84 |
+
CATG
|
85 |
+
CATT
|
86 |
+
CCAA
|
87 |
+
CCAC
|
88 |
+
CCAG
|
89 |
+
CCAT
|
90 |
+
CCCA
|
91 |
+
CCCC
|
92 |
+
CCCG
|
93 |
+
CCCT
|
94 |
+
CCGA
|
95 |
+
CCGC
|
96 |
+
CCGG
|
97 |
+
CCGT
|
98 |
+
CCTA
|
99 |
+
CCTC
|
100 |
+
CCTG
|
101 |
+
CCTT
|
102 |
+
CGAA
|
103 |
+
CGAC
|
104 |
+
CGAG
|
105 |
+
CGAT
|
106 |
+
CGCA
|
107 |
+
CGCC
|
108 |
+
CGCG
|
109 |
+
CGCT
|
110 |
+
CGGA
|
111 |
+
CGGC
|
112 |
+
CGGG
|
113 |
+
CGGT
|
114 |
+
CGTA
|
115 |
+
CGTC
|
116 |
+
CGTG
|
117 |
+
CGTT
|
118 |
+
CTAA
|
119 |
+
CTAC
|
120 |
+
CTAG
|
121 |
+
CTAT
|
122 |
+
CTCA
|
123 |
+
CTCC
|
124 |
+
CTCG
|
125 |
+
CTCT
|
126 |
+
CTGA
|
127 |
+
CTGC
|
128 |
+
CTGG
|
129 |
+
CTGT
|
130 |
+
CTTA
|
131 |
+
CTTC
|
132 |
+
CTTG
|
133 |
+
CTTT
|
134 |
+
GAAA
|
135 |
+
GAAC
|
136 |
+
GAAG
|
137 |
+
GAAT
|
138 |
+
GACA
|
139 |
+
GACC
|
140 |
+
GACG
|
141 |
+
GACT
|
142 |
+
GAGA
|
143 |
+
GAGC
|
144 |
+
GAGG
|
145 |
+
GAGT
|
146 |
+
GATA
|
147 |
+
GATC
|
148 |
+
GATG
|
149 |
+
GATT
|
150 |
+
GCAA
|
151 |
+
GCAC
|
152 |
+
GCAG
|
153 |
+
GCAT
|
154 |
+
GCCA
|
155 |
+
GCCC
|
156 |
+
GCCG
|
157 |
+
GCCT
|
158 |
+
GCGA
|
159 |
+
GCGC
|
160 |
+
GCGG
|
161 |
+
GCGT
|
162 |
+
GCTA
|
163 |
+
GCTC
|
164 |
+
GCTG
|
165 |
+
GCTT
|
166 |
+
GGAA
|
167 |
+
GGAC
|
168 |
+
GGAG
|
169 |
+
GGAT
|
170 |
+
GGCA
|
171 |
+
GGCC
|
172 |
+
GGCG
|
173 |
+
GGCT
|
174 |
+
GGGA
|
175 |
+
GGGC
|
176 |
+
GGGG
|
177 |
+
GGGT
|
178 |
+
GGTA
|
179 |
+
GGTC
|
180 |
+
GGTG
|
181 |
+
GGTT
|
182 |
+
GTAA
|
183 |
+
GTAC
|
184 |
+
GTAG
|
185 |
+
GTAT
|
186 |
+
GTCA
|
187 |
+
GTCC
|
188 |
+
GTCG
|
189 |
+
GTCT
|
190 |
+
GTGA
|
191 |
+
GTGC
|
192 |
+
GTGG
|
193 |
+
GTGT
|
194 |
+
GTTA
|
195 |
+
GTTC
|
196 |
+
GTTG
|
197 |
+
GTTT
|
198 |
+
TAAA
|
199 |
+
TAAC
|
200 |
+
TAAG
|
201 |
+
TAAT
|
202 |
+
TACA
|
203 |
+
TACC
|
204 |
+
TACG
|
205 |
+
TACT
|
206 |
+
TAGA
|
207 |
+
TAGC
|
208 |
+
TAGG
|
209 |
+
TAGT
|
210 |
+
TATA
|
211 |
+
TATC
|
212 |
+
TATG
|
213 |
+
TATT
|
214 |
+
TCAA
|
215 |
+
TCAC
|
216 |
+
TCAG
|
217 |
+
TCAT
|
218 |
+
TCCA
|
219 |
+
TCCC
|
220 |
+
TCCG
|
221 |
+
TCCT
|
222 |
+
TCGA
|
223 |
+
TCGC
|
224 |
+
TCGG
|
225 |
+
TCGT
|
226 |
+
TCTA
|
227 |
+
TCTC
|
228 |
+
TCTG
|
229 |
+
TCTT
|
230 |
+
TGAA
|
231 |
+
TGAC
|
232 |
+
TGAG
|
233 |
+
TGAT
|
234 |
+
TGCA
|
235 |
+
TGCC
|
236 |
+
TGCG
|
237 |
+
TGCT
|
238 |
+
TGGA
|
239 |
+
TGGC
|
240 |
+
TGGG
|
241 |
+
TGGT
|
242 |
+
TGTA
|
243 |
+
TGTC
|
244 |
+
TGTG
|
245 |
+
TGTT
|
246 |
+
TTAA
|
247 |
+
TTAC
|
248 |
+
TTAG
|
249 |
+
TTAT
|
250 |
+
TTCA
|
251 |
+
TTCC
|
252 |
+
TTCG
|
253 |
+
TTCT
|
254 |
+
TTGA
|
255 |
+
TTGC
|
256 |
+
TTGG
|
257 |
+
TTGT
|
258 |
+
TTTA
|
259 |
+
TTTC
|
260 |
+
TTTG
|
261 |
+
TTTT
|
data/prokbert_vocabs/prokbert-base-dna5/vocab.txt
ADDED
@@ -0,0 +1,1029 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[PAD]
|
2 |
+
[UNK]
|
3 |
+
[CLS]
|
4 |
+
[SEP]
|
5 |
+
[MASK]
|
6 |
+
AAAAA
|
7 |
+
AAAAC
|
8 |
+
AAAAG
|
9 |
+
AAAAT
|
10 |
+
AAACA
|
11 |
+
AAACC
|
12 |
+
AAACG
|
13 |
+
AAACT
|
14 |
+
AAAGA
|
15 |
+
AAAGC
|
16 |
+
AAAGG
|
17 |
+
AAAGT
|
18 |
+
AAATA
|
19 |
+
AAATC
|
20 |
+
AAATG
|
21 |
+
AAATT
|
22 |
+
AACAA
|
23 |
+
AACAC
|
24 |
+
AACAG
|
25 |
+
AACAT
|
26 |
+
AACCA
|
27 |
+
AACCC
|
28 |
+
AACCG
|
29 |
+
AACCT
|
30 |
+
AACGA
|
31 |
+
AACGC
|
32 |
+
AACGG
|
33 |
+
AACGT
|
34 |
+
AACTA
|
35 |
+
AACTC
|
36 |
+
AACTG
|
37 |
+
AACTT
|
38 |
+
AAGAA
|
39 |
+
AAGAC
|
40 |
+
AAGAG
|
41 |
+
AAGAT
|
42 |
+
AAGCA
|
43 |
+
AAGCC
|
44 |
+
AAGCG
|
45 |
+
AAGCT
|
46 |
+
AAGGA
|
47 |
+
AAGGC
|
48 |
+
AAGGG
|
49 |
+
AAGGT
|
50 |
+
AAGTA
|
51 |
+
AAGTC
|
52 |
+
AAGTG
|
53 |
+
AAGTT
|
54 |
+
AATAA
|
55 |
+
AATAC
|
56 |
+
AATAG
|
57 |
+
AATAT
|
58 |
+
AATCA
|
59 |
+
AATCC
|
60 |
+
AATCG
|
61 |
+
AATCT
|
62 |
+
AATGA
|
63 |
+
AATGC
|
64 |
+
AATGG
|
65 |
+
AATGT
|
66 |
+
AATTA
|
67 |
+
AATTC
|
68 |
+
AATTG
|
69 |
+
AATTT
|
70 |
+
ACAAA
|
71 |
+
ACAAC
|
72 |
+
ACAAG
|
73 |
+
ACAAT
|
74 |
+
ACACA
|
75 |
+
ACACC
|
76 |
+
ACACG
|
77 |
+
ACACT
|
78 |
+
ACAGA
|
79 |
+
ACAGC
|
80 |
+
ACAGG
|
81 |
+
ACAGT
|
82 |
+
ACATA
|
83 |
+
ACATC
|
84 |
+
ACATG
|
85 |
+
ACATT
|
86 |
+
ACCAA
|
87 |
+
ACCAC
|
88 |
+
ACCAG
|
89 |
+
ACCAT
|
90 |
+
ACCCA
|
91 |
+
ACCCC
|
92 |
+
ACCCG
|
93 |
+
ACCCT
|
94 |
+
ACCGA
|
95 |
+
ACCGC
|
96 |
+
ACCGG
|
97 |
+
ACCGT
|
98 |
+
ACCTA
|
99 |
+
ACCTC
|
100 |
+
ACCTG
|
101 |
+
ACCTT
|
102 |
+
ACGAA
|
103 |
+
ACGAC
|
104 |
+
ACGAG
|
105 |
+
ACGAT
|
106 |
+
ACGCA
|
107 |
+
ACGCC
|
108 |
+
ACGCG
|
109 |
+
ACGCT
|
110 |
+
ACGGA
|
111 |
+
ACGGC
|
112 |
+
ACGGG
|
113 |
+
ACGGT
|
114 |
+
ACGTA
|
115 |
+
ACGTC
|
116 |
+
ACGTG
|
117 |
+
ACGTT
|
118 |
+
ACTAA
|
119 |
+
ACTAC
|
120 |
+
ACTAG
|
121 |
+
ACTAT
|
122 |
+
ACTCA
|
123 |
+
ACTCC
|
124 |
+
ACTCG
|
125 |
+
ACTCT
|
126 |
+
ACTGA
|
127 |
+
ACTGC
|
128 |
+
ACTGG
|
129 |
+
ACTGT
|
130 |
+
ACTTA
|
131 |
+
ACTTC
|
132 |
+
ACTTG
|
133 |
+
ACTTT
|
134 |
+
AGAAA
|
135 |
+
AGAAC
|
136 |
+
AGAAG
|
137 |
+
AGAAT
|
138 |
+
AGACA
|
139 |
+
AGACC
|
140 |
+
AGACG
|
141 |
+
AGACT
|
142 |
+
AGAGA
|
143 |
+
AGAGC
|
144 |
+
AGAGG
|
145 |
+
AGAGT
|
146 |
+
AGATA
|
147 |
+
AGATC
|
148 |
+
AGATG
|
149 |
+
AGATT
|
150 |
+
AGCAA
|
151 |
+
AGCAC
|
152 |
+
AGCAG
|
153 |
+
AGCAT
|
154 |
+
AGCCA
|
155 |
+
AGCCC
|
156 |
+
AGCCG
|
157 |
+
AGCCT
|
158 |
+
AGCGA
|
159 |
+
AGCGC
|
160 |
+
AGCGG
|
161 |
+
AGCGT
|
162 |
+
AGCTA
|
163 |
+
AGCTC
|
164 |
+
AGCTG
|
165 |
+
AGCTT
|
166 |
+
AGGAA
|
167 |
+
AGGAC
|
168 |
+
AGGAG
|
169 |
+
AGGAT
|
170 |
+
AGGCA
|
171 |
+
AGGCC
|
172 |
+
AGGCG
|
173 |
+
AGGCT
|
174 |
+
AGGGA
|
175 |
+
AGGGC
|
176 |
+
AGGGG
|
177 |
+
AGGGT
|
178 |
+
AGGTA
|
179 |
+
AGGTC
|
180 |
+
AGGTG
|
181 |
+
AGGTT
|
182 |
+
AGTAA
|
183 |
+
AGTAC
|
184 |
+
AGTAG
|
185 |
+
AGTAT
|
186 |
+
AGTCA
|
187 |
+
AGTCC
|
188 |
+
AGTCG
|
189 |
+
AGTCT
|
190 |
+
AGTGA
|
191 |
+
AGTGC
|
192 |
+
AGTGG
|
193 |
+
AGTGT
|
194 |
+
AGTTA
|
195 |
+
AGTTC
|
196 |
+
AGTTG
|
197 |
+
AGTTT
|
198 |
+
ATAAA
|
199 |
+
ATAAC
|
200 |
+
ATAAG
|
201 |
+
ATAAT
|
202 |
+
ATACA
|
203 |
+
ATACC
|
204 |
+
ATACG
|
205 |
+
ATACT
|
206 |
+
ATAGA
|
207 |
+
ATAGC
|
208 |
+
ATAGG
|
209 |
+
ATAGT
|
210 |
+
ATATA
|
211 |
+
ATATC
|
212 |
+
ATATG
|
213 |
+
ATATT
|
214 |
+
ATCAA
|
215 |
+
ATCAC
|
216 |
+
ATCAG
|
217 |
+
ATCAT
|
218 |
+
ATCCA
|
219 |
+
ATCCC
|
220 |
+
ATCCG
|
221 |
+
ATCCT
|
222 |
+
ATCGA
|
223 |
+
ATCGC
|
224 |
+
ATCGG
|
225 |
+
ATCGT
|
226 |
+
ATCTA
|
227 |
+
ATCTC
|
228 |
+
ATCTG
|
229 |
+
ATCTT
|
230 |
+
ATGAA
|
231 |
+
ATGAC
|
232 |
+
ATGAG
|
233 |
+
ATGAT
|
234 |
+
ATGCA
|
235 |
+
ATGCC
|
236 |
+
ATGCG
|
237 |
+
ATGCT
|
238 |
+
ATGGA
|
239 |
+
ATGGC
|
240 |
+
ATGGG
|
241 |
+
ATGGT
|
242 |
+
ATGTA
|
243 |
+
ATGTC
|
244 |
+
ATGTG
|
245 |
+
ATGTT
|
246 |
+
ATTAA
|
247 |
+
ATTAC
|
248 |
+
ATTAG
|
249 |
+
ATTAT
|
250 |
+
ATTCA
|
251 |
+
ATTCC
|
252 |
+
ATTCG
|
253 |
+
ATTCT
|
254 |
+
ATTGA
|
255 |
+
ATTGC
|
256 |
+
ATTGG
|
257 |
+
ATTGT
|
258 |
+
ATTTA
|
259 |
+
ATTTC
|
260 |
+
ATTTG
|
261 |
+
ATTTT
|
262 |
+
CAAAA
|
263 |
+
CAAAC
|
264 |
+
CAAAG
|
265 |
+
CAAAT
|
266 |
+
CAACA
|
267 |
+
CAACC
|
268 |
+
CAACG
|
269 |
+
CAACT
|
270 |
+
CAAGA
|
271 |
+
CAAGC
|
272 |
+
CAAGG
|
273 |
+
CAAGT
|
274 |
+
CAATA
|
275 |
+
CAATC
|
276 |
+
CAATG
|
277 |
+
CAATT
|
278 |
+
CACAA
|
279 |
+
CACAC
|
280 |
+
CACAG
|
281 |
+
CACAT
|
282 |
+
CACCA
|
283 |
+
CACCC
|
284 |
+
CACCG
|
285 |
+
CACCT
|
286 |
+
CACGA
|
287 |
+
CACGC
|
288 |
+
CACGG
|
289 |
+
CACGT
|
290 |
+
CACTA
|
291 |
+
CACTC
|
292 |
+
CACTG
|
293 |
+
CACTT
|
294 |
+
CAGAA
|
295 |
+
CAGAC
|
296 |
+
CAGAG
|
297 |
+
CAGAT
|
298 |
+
CAGCA
|
299 |
+
CAGCC
|
300 |
+
CAGCG
|
301 |
+
CAGCT
|
302 |
+
CAGGA
|
303 |
+
CAGGC
|
304 |
+
CAGGG
|
305 |
+
CAGGT
|
306 |
+
CAGTA
|
307 |
+
CAGTC
|
308 |
+
CAGTG
|
309 |
+
CAGTT
|
310 |
+
CATAA
|
311 |
+
CATAC
|
312 |
+
CATAG
|
313 |
+
CATAT
|
314 |
+
CATCA
|
315 |
+
CATCC
|
316 |
+
CATCG
|
317 |
+
CATCT
|
318 |
+
CATGA
|
319 |
+
CATGC
|
320 |
+
CATGG
|
321 |
+
CATGT
|
322 |
+
CATTA
|
323 |
+
CATTC
|
324 |
+
CATTG
|
325 |
+
CATTT
|
326 |
+
CCAAA
|
327 |
+
CCAAC
|
328 |
+
CCAAG
|
329 |
+
CCAAT
|
330 |
+
CCACA
|
331 |
+
CCACC
|
332 |
+
CCACG
|
333 |
+
CCACT
|
334 |
+
CCAGA
|
335 |
+
CCAGC
|
336 |
+
CCAGG
|
337 |
+
CCAGT
|
338 |
+
CCATA
|
339 |
+
CCATC
|
340 |
+
CCATG
|
341 |
+
CCATT
|
342 |
+
CCCAA
|
343 |
+
CCCAC
|
344 |
+
CCCAG
|
345 |
+
CCCAT
|
346 |
+
CCCCA
|
347 |
+
CCCCC
|
348 |
+
CCCCG
|
349 |
+
CCCCT
|
350 |
+
CCCGA
|
351 |
+
CCCGC
|
352 |
+
CCCGG
|
353 |
+
CCCGT
|
354 |
+
CCCTA
|
355 |
+
CCCTC
|
356 |
+
CCCTG
|
357 |
+
CCCTT
|
358 |
+
CCGAA
|
359 |
+
CCGAC
|
360 |
+
CCGAG
|
361 |
+
CCGAT
|
362 |
+
CCGCA
|
363 |
+
CCGCC
|
364 |
+
CCGCG
|
365 |
+
CCGCT
|
366 |
+
CCGGA
|
367 |
+
CCGGC
|
368 |
+
CCGGG
|
369 |
+
CCGGT
|
370 |
+
CCGTA
|
371 |
+
CCGTC
|
372 |
+
CCGTG
|
373 |
+
CCGTT
|
374 |
+
CCTAA
|
375 |
+
CCTAC
|
376 |
+
CCTAG
|
377 |
+
CCTAT
|
378 |
+
CCTCA
|
379 |
+
CCTCC
|
380 |
+
CCTCG
|
381 |
+
CCTCT
|
382 |
+
CCTGA
|
383 |
+
CCTGC
|
384 |
+
CCTGG
|
385 |
+
CCTGT
|
386 |
+
CCTTA
|
387 |
+
CCTTC
|
388 |
+
CCTTG
|
389 |
+
CCTTT
|
390 |
+
CGAAA
|
391 |
+
CGAAC
|
392 |
+
CGAAG
|
393 |
+
CGAAT
|
394 |
+
CGACA
|
395 |
+
CGACC
|
396 |
+
CGACG
|
397 |
+
CGACT
|
398 |
+
CGAGA
|
399 |
+
CGAGC
|
400 |
+
CGAGG
|
401 |
+
CGAGT
|
402 |
+
CGATA
|
403 |
+
CGATC
|
404 |
+
CGATG
|
405 |
+
CGATT
|
406 |
+
CGCAA
|
407 |
+
CGCAC
|
408 |
+
CGCAG
|
409 |
+
CGCAT
|
410 |
+
CGCCA
|
411 |
+
CGCCC
|
412 |
+
CGCCG
|
413 |
+
CGCCT
|
414 |
+
CGCGA
|
415 |
+
CGCGC
|
416 |
+
CGCGG
|
417 |
+
CGCGT
|
418 |
+
CGCTA
|
419 |
+
CGCTC
|
420 |
+
CGCTG
|
421 |
+
CGCTT
|
422 |
+
CGGAA
|
423 |
+
CGGAC
|
424 |
+
CGGAG
|
425 |
+
CGGAT
|
426 |
+
CGGCA
|
427 |
+
CGGCC
|
428 |
+
CGGCG
|
429 |
+
CGGCT
|
430 |
+
CGGGA
|
431 |
+
CGGGC
|
432 |
+
CGGGG
|
433 |
+
CGGGT
|
434 |
+
CGGTA
|
435 |
+
CGGTC
|
436 |
+
CGGTG
|
437 |
+
CGGTT
|
438 |
+
CGTAA
|
439 |
+
CGTAC
|
440 |
+
CGTAG
|
441 |
+
CGTAT
|
442 |
+
CGTCA
|
443 |
+
CGTCC
|
444 |
+
CGTCG
|
445 |
+
CGTCT
|
446 |
+
CGTGA
|
447 |
+
CGTGC
|
448 |
+
CGTGG
|
449 |
+
CGTGT
|
450 |
+
CGTTA
|
451 |
+
CGTTC
|
452 |
+
CGTTG
|
453 |
+
CGTTT
|
454 |
+
CTAAA
|
455 |
+
CTAAC
|
456 |
+
CTAAG
|
457 |
+
CTAAT
|
458 |
+
CTACA
|
459 |
+
CTACC
|
460 |
+
CTACG
|
461 |
+
CTACT
|
462 |
+
CTAGA
|
463 |
+
CTAGC
|
464 |
+
CTAGG
|
465 |
+
CTAGT
|
466 |
+
CTATA
|
467 |
+
CTATC
|
468 |
+
CTATG
|
469 |
+
CTATT
|
470 |
+
CTCAA
|
471 |
+
CTCAC
|
472 |
+
CTCAG
|
473 |
+
CTCAT
|
474 |
+
CTCCA
|
475 |
+
CTCCC
|
476 |
+
CTCCG
|
477 |
+
CTCCT
|
478 |
+
CTCGA
|
479 |
+
CTCGC
|
480 |
+
CTCGG
|
481 |
+
CTCGT
|
482 |
+
CTCTA
|
483 |
+
CTCTC
|
484 |
+
CTCTG
|
485 |
+
CTCTT
|
486 |
+
CTGAA
|
487 |
+
CTGAC
|
488 |
+
CTGAG
|
489 |
+
CTGAT
|
490 |
+
CTGCA
|
491 |
+
CTGCC
|
492 |
+
CTGCG
|
493 |
+
CTGCT
|
494 |
+
CTGGA
|
495 |
+
CTGGC
|
496 |
+
CTGGG
|
497 |
+
CTGGT
|
498 |
+
CTGTA
|
499 |
+
CTGTC
|
500 |
+
CTGTG
|
501 |
+
CTGTT
|
502 |
+
CTTAA
|
503 |
+
CTTAC
|
504 |
+
CTTAG
|
505 |
+
CTTAT
|
506 |
+
CTTCA
|
507 |
+
CTTCC
|
508 |
+
CTTCG
|
509 |
+
CTTCT
|
510 |
+
CTTGA
|
511 |
+
CTTGC
|
512 |
+
CTTGG
|
513 |
+
CTTGT
|
514 |
+
CTTTA
|
515 |
+
CTTTC
|
516 |
+
CTTTG
|
517 |
+
CTTTT
|
518 |
+
GAAAA
|
519 |
+
GAAAC
|
520 |
+
GAAAG
|
521 |
+
GAAAT
|
522 |
+
GAACA
|
523 |
+
GAACC
|
524 |
+
GAACG
|
525 |
+
GAACT
|
526 |
+
GAAGA
|
527 |
+
GAAGC
|
528 |
+
GAAGG
|
529 |
+
GAAGT
|
530 |
+
GAATA
|
531 |
+
GAATC
|
532 |
+
GAATG
|
533 |
+
GAATT
|
534 |
+
GACAA
|
535 |
+
GACAC
|
536 |
+
GACAG
|
537 |
+
GACAT
|
538 |
+
GACCA
|
539 |
+
GACCC
|
540 |
+
GACCG
|
541 |
+
GACCT
|
542 |
+
GACGA
|
543 |
+
GACGC
|
544 |
+
GACGG
|
545 |
+
GACGT
|
546 |
+
GACTA
|
547 |
+
GACTC
|
548 |
+
GACTG
|
549 |
+
GACTT
|
550 |
+
GAGAA
|
551 |
+
GAGAC
|
552 |
+
GAGAG
|
553 |
+
GAGAT
|
554 |
+
GAGCA
|
555 |
+
GAGCC
|
556 |
+
GAGCG
|
557 |
+
GAGCT
|
558 |
+
GAGGA
|
559 |
+
GAGGC
|
560 |
+
GAGGG
|
561 |
+
GAGGT
|
562 |
+
GAGTA
|
563 |
+
GAGTC
|
564 |
+
GAGTG
|
565 |
+
GAGTT
|
566 |
+
GATAA
|
567 |
+
GATAC
|
568 |
+
GATAG
|
569 |
+
GATAT
|
570 |
+
GATCA
|
571 |
+
GATCC
|
572 |
+
GATCG
|
573 |
+
GATCT
|
574 |
+
GATGA
|
575 |
+
GATGC
|
576 |
+
GATGG
|
577 |
+
GATGT
|
578 |
+
GATTA
|
579 |
+
GATTC
|
580 |
+
GATTG
|
581 |
+
GATTT
|
582 |
+
GCAAA
|
583 |
+
GCAAC
|
584 |
+
GCAAG
|
585 |
+
GCAAT
|
586 |
+
GCACA
|
587 |
+
GCACC
|
588 |
+
GCACG
|
589 |
+
GCACT
|
590 |
+
GCAGA
|
591 |
+
GCAGC
|
592 |
+
GCAGG
|
593 |
+
GCAGT
|
594 |
+
GCATA
|
595 |
+
GCATC
|
596 |
+
GCATG
|
597 |
+
GCATT
|
598 |
+
GCCAA
|
599 |
+
GCCAC
|
600 |
+
GCCAG
|
601 |
+
GCCAT
|
602 |
+
GCCCA
|
603 |
+
GCCCC
|
604 |
+
GCCCG
|
605 |
+
GCCCT
|
606 |
+
GCCGA
|
607 |
+
GCCGC
|
608 |
+
GCCGG
|
609 |
+
GCCGT
|
610 |
+
GCCTA
|
611 |
+
GCCTC
|
612 |
+
GCCTG
|
613 |
+
GCCTT
|
614 |
+
GCGAA
|
615 |
+
GCGAC
|
616 |
+
GCGAG
|
617 |
+
GCGAT
|
618 |
+
GCGCA
|
619 |
+
GCGCC
|
620 |
+
GCGCG
|
621 |
+
GCGCT
|
622 |
+
GCGGA
|
623 |
+
GCGGC
|
624 |
+
GCGGG
|
625 |
+
GCGGT
|
626 |
+
GCGTA
|
627 |
+
GCGTC
|
628 |
+
GCGTG
|
629 |
+
GCGTT
|
630 |
+
GCTAA
|
631 |
+
GCTAC
|
632 |
+
GCTAG
|
633 |
+
GCTAT
|
634 |
+
GCTCA
|
635 |
+
GCTCC
|
636 |
+
GCTCG
|
637 |
+
GCTCT
|
638 |
+
GCTGA
|
639 |
+
GCTGC
|
640 |
+
GCTGG
|
641 |
+
GCTGT
|
642 |
+
GCTTA
|
643 |
+
GCTTC
|
644 |
+
GCTTG
|
645 |
+
GCTTT
|
646 |
+
GGAAA
|
647 |
+
GGAAC
|
648 |
+
GGAAG
|
649 |
+
GGAAT
|
650 |
+
GGACA
|
651 |
+
GGACC
|
652 |
+
GGACG
|
653 |
+
GGACT
|
654 |
+
GGAGA
|
655 |
+
GGAGC
|
656 |
+
GGAGG
|
657 |
+
GGAGT
|
658 |
+
GGATA
|
659 |
+
GGATC
|
660 |
+
GGATG
|
661 |
+
GGATT
|
662 |
+
GGCAA
|
663 |
+
GGCAC
|
664 |
+
GGCAG
|
665 |
+
GGCAT
|
666 |
+
GGCCA
|
667 |
+
GGCCC
|
668 |
+
GGCCG
|
669 |
+
GGCCT
|
670 |
+
GGCGA
|
671 |
+
GGCGC
|
672 |
+
GGCGG
|
673 |
+
GGCGT
|
674 |
+
GGCTA
|
675 |
+
GGCTC
|
676 |
+
GGCTG
|
677 |
+
GGCTT
|
678 |
+
GGGAA
|
679 |
+
GGGAC
|
680 |
+
GGGAG
|
681 |
+
GGGAT
|
682 |
+
GGGCA
|
683 |
+
GGGCC
|
684 |
+
GGGCG
|
685 |
+
GGGCT
|
686 |
+
GGGGA
|
687 |
+
GGGGC
|
688 |
+
GGGGG
|
689 |
+
GGGGT
|
690 |
+
GGGTA
|
691 |
+
GGGTC
|
692 |
+
GGGTG
|
693 |
+
GGGTT
|
694 |
+
GGTAA
|
695 |
+
GGTAC
|
696 |
+
GGTAG
|
697 |
+
GGTAT
|
698 |
+
GGTCA
|
699 |
+
GGTCC
|
700 |
+
GGTCG
|
701 |
+
GGTCT
|
702 |
+
GGTGA
|
703 |
+
GGTGC
|
704 |
+
GGTGG
|
705 |
+
GGTGT
|
706 |
+
GGTTA
|
707 |
+
GGTTC
|
708 |
+
GGTTG
|
709 |
+
GGTTT
|
710 |
+
GTAAA
|
711 |
+
GTAAC
|
712 |
+
GTAAG
|
713 |
+
GTAAT
|
714 |
+
GTACA
|
715 |
+
GTACC
|
716 |
+
GTACG
|
717 |
+
GTACT
|
718 |
+
GTAGA
|
719 |
+
GTAGC
|
720 |
+
GTAGG
|
721 |
+
GTAGT
|
722 |
+
GTATA
|
723 |
+
GTATC
|
724 |
+
GTATG
|
725 |
+
GTATT
|
726 |
+
GTCAA
|
727 |
+
GTCAC
|
728 |
+
GTCAG
|
729 |
+
GTCAT
|
730 |
+
GTCCA
|
731 |
+
GTCCC
|
732 |
+
GTCCG
|
733 |
+
GTCCT
|
734 |
+
GTCGA
|
735 |
+
GTCGC
|
736 |
+
GTCGG
|
737 |
+
GTCGT
|
738 |
+
GTCTA
|
739 |
+
GTCTC
|
740 |
+
GTCTG
|
741 |
+
GTCTT
|
742 |
+
GTGAA
|
743 |
+
GTGAC
|
744 |
+
GTGAG
|
745 |
+
GTGAT
|
746 |
+
GTGCA
|
747 |
+
GTGCC
|
748 |
+
GTGCG
|
749 |
+
GTGCT
|
750 |
+
GTGGA
|
751 |
+
GTGGC
|
752 |
+
GTGGG
|
753 |
+
GTGGT
|
754 |
+
GTGTA
|
755 |
+
GTGTC
|
756 |
+
GTGTG
|
757 |
+
GTGTT
|
758 |
+
GTTAA
|
759 |
+
GTTAC
|
760 |
+
GTTAG
|
761 |
+
GTTAT
|
762 |
+
GTTCA
|
763 |
+
GTTCC
|
764 |
+
GTTCG
|
765 |
+
GTTCT
|
766 |
+
GTTGA
|
767 |
+
GTTGC
|
768 |
+
GTTGG
|
769 |
+
GTTGT
|
770 |
+
GTTTA
|
771 |
+
GTTTC
|
772 |
+
GTTTG
|
773 |
+
GTTTT
|
774 |
+
TAAAA
|
775 |
+
TAAAC
|
776 |
+
TAAAG
|
777 |
+
TAAAT
|
778 |
+
TAACA
|
779 |
+
TAACC
|
780 |
+
TAACG
|
781 |
+
TAACT
|
782 |
+
TAAGA
|
783 |
+
TAAGC
|
784 |
+
TAAGG
|
785 |
+
TAAGT
|
786 |
+
TAATA
|
787 |
+
TAATC
|
788 |
+
TAATG
|
789 |
+
TAATT
|
790 |
+
TACAA
|
791 |
+
TACAC
|
792 |
+
TACAG
|
793 |
+
TACAT
|
794 |
+
TACCA
|
795 |
+
TACCC
|
796 |
+
TACCG
|
797 |
+
TACCT
|
798 |
+
TACGA
|
799 |
+
TACGC
|
800 |
+
TACGG
|
801 |
+
TACGT
|
802 |
+
TACTA
|
803 |
+
TACTC
|
804 |
+
TACTG
|
805 |
+
TACTT
|
806 |
+
TAGAA
|
807 |
+
TAGAC
|
808 |
+
TAGAG
|
809 |
+
TAGAT
|
810 |
+
TAGCA
|
811 |
+
TAGCC
|
812 |
+
TAGCG
|
813 |
+
TAGCT
|
814 |
+
TAGGA
|
815 |
+
TAGGC
|
816 |
+
TAGGG
|
817 |
+
TAGGT
|
818 |
+
TAGTA
|
819 |
+
TAGTC
|
820 |
+
TAGTG
|
821 |
+
TAGTT
|
822 |
+
TATAA
|
823 |
+
TATAC
|
824 |
+
TATAG
|
825 |
+
TATAT
|
826 |
+
TATCA
|
827 |
+
TATCC
|
828 |
+
TATCG
|
829 |
+
TATCT
|
830 |
+
TATGA
|
831 |
+
TATGC
|
832 |
+
TATGG
|
833 |
+
TATGT
|
834 |
+
TATTA
|
835 |
+
TATTC
|
836 |
+
TATTG
|
837 |
+
TATTT
|
838 |
+
TCAAA
|
839 |
+
TCAAC
|
840 |
+
TCAAG
|
841 |
+
TCAAT
|
842 |
+
TCACA
|
843 |
+
TCACC
|
844 |
+
TCACG
|
845 |
+
TCACT
|
846 |
+
TCAGA
|
847 |
+
TCAGC
|
848 |
+
TCAGG
|
849 |
+
TCAGT
|
850 |
+
TCATA
|
851 |
+
TCATC
|
852 |
+
TCATG
|
853 |
+
TCATT
|
854 |
+
TCCAA
|
855 |
+
TCCAC
|
856 |
+
TCCAG
|
857 |
+
TCCAT
|
858 |
+
TCCCA
|
859 |
+
TCCCC
|
860 |
+
TCCCG
|
861 |
+
TCCCT
|
862 |
+
TCCGA
|
863 |
+
TCCGC
|
864 |
+
TCCGG
|
865 |
+
TCCGT
|
866 |
+
TCCTA
|
867 |
+
TCCTC
|
868 |
+
TCCTG
|
869 |
+
TCCTT
|
870 |
+
TCGAA
|
871 |
+
TCGAC
|
872 |
+
TCGAG
|
873 |
+
TCGAT
|
874 |
+
TCGCA
|
875 |
+
TCGCC
|
876 |
+
TCGCG
|
877 |
+
TCGCT
|
878 |
+
TCGGA
|
879 |
+
TCGGC
|
880 |
+
TCGGG
|
881 |
+
TCGGT
|
882 |
+
TCGTA
|
883 |
+
TCGTC
|
884 |
+
TCGTG
|
885 |
+
TCGTT
|
886 |
+
TCTAA
|
887 |
+
TCTAC
|
888 |
+
TCTAG
|
889 |
+
TCTAT
|
890 |
+
TCTCA
|
891 |
+
TCTCC
|
892 |
+
TCTCG
|
893 |
+
TCTCT
|
894 |
+
TCTGA
|
895 |
+
TCTGC
|
896 |
+
TCTGG
|
897 |
+
TCTGT
|
898 |
+
TCTTA
|
899 |
+
TCTTC
|
900 |
+
TCTTG
|
901 |
+
TCTTT
|
902 |
+
TGAAA
|
903 |
+
TGAAC
|
904 |
+
TGAAG
|
905 |
+
TGAAT
|
906 |
+
TGACA
|
907 |
+
TGACC
|
908 |
+
TGACG
|
909 |
+
TGACT
|
910 |
+
TGAGA
|
911 |
+
TGAGC
|
912 |
+
TGAGG
|
913 |
+
TGAGT
|
914 |
+
TGATA
|
915 |
+
TGATC
|
916 |
+
TGATG
|
917 |
+
TGATT
|
918 |
+
TGCAA
|
919 |
+
TGCAC
|
920 |
+
TGCAG
|
921 |
+
TGCAT
|
922 |
+
TGCCA
|
923 |
+
TGCCC
|
924 |
+
TGCCG
|
925 |
+
TGCCT
|
926 |
+
TGCGA
|
927 |
+
TGCGC
|
928 |
+
TGCGG
|
929 |
+
TGCGT
|
930 |
+
TGCTA
|
931 |
+
TGCTC
|
932 |
+
TGCTG
|
933 |
+
TGCTT
|
934 |
+
TGGAA
|
935 |
+
TGGAC
|
936 |
+
TGGAG
|
937 |
+
TGGAT
|
938 |
+
TGGCA
|
939 |
+
TGGCC
|
940 |
+
TGGCG
|
941 |
+
TGGCT
|
942 |
+
TGGGA
|
943 |
+
TGGGC
|
944 |
+
TGGGG
|
945 |
+
TGGGT
|
946 |
+
TGGTA
|
947 |
+
TGGTC
|
948 |
+
TGGTG
|
949 |
+
TGGTT
|
950 |
+
TGTAA
|
951 |
+
TGTAC
|
952 |
+
TGTAG
|
953 |
+
TGTAT
|
954 |
+
TGTCA
|
955 |
+
TGTCC
|
956 |
+
TGTCG
|
957 |
+
TGTCT
|
958 |
+
TGTGA
|
959 |
+
TGTGC
|
960 |
+
TGTGG
|
961 |
+
TGTGT
|
962 |
+
TGTTA
|
963 |
+
TGTTC
|
964 |
+
TGTTG
|
965 |
+
TGTTT
|
966 |
+
TTAAA
|
967 |
+
TTAAC
|
968 |
+
TTAAG
|
969 |
+
TTAAT
|
970 |
+
TTACA
|
971 |
+
TTACC
|
972 |
+
TTACG
|
973 |
+
TTACT
|
974 |
+
TTAGA
|
975 |
+
TTAGC
|
976 |
+
TTAGG
|
977 |
+
TTAGT
|
978 |
+
TTATA
|
979 |
+
TTATC
|
980 |
+
TTATG
|
981 |
+
TTATT
|
982 |
+
TTCAA
|
983 |
+
TTCAC
|
984 |
+
TTCAG
|
985 |
+
TTCAT
|
986 |
+
TTCCA
|
987 |
+
TTCCC
|
988 |
+
TTCCG
|
989 |
+
TTCCT
|
990 |
+
TTCGA
|
991 |
+
TTCGC
|
992 |
+
TTCGG
|
993 |
+
TTCGT
|
994 |
+
TTCTA
|
995 |
+
TTCTC
|
996 |
+
TTCTG
|
997 |
+
TTCTT
|
998 |
+
TTGAA
|
999 |
+
TTGAC
|
1000 |
+
TTGAG
|
1001 |
+
TTGAT
|
1002 |
+
TTGCA
|
1003 |
+
TTGCC
|
1004 |
+
TTGCG
|
1005 |
+
TTGCT
|
1006 |
+
TTGGA
|
1007 |
+
TTGGC
|
1008 |
+
TTGGG
|
1009 |
+
TTGGT
|
1010 |
+
TTGTA
|
1011 |
+
TTGTC
|
1012 |
+
TTGTG
|
1013 |
+
TTGTT
|
1014 |
+
TTTAA
|
1015 |
+
TTTAC
|
1016 |
+
TTTAG
|
1017 |
+
TTTAT
|
1018 |
+
TTTCA
|
1019 |
+
TTTCC
|
1020 |
+
TTTCG
|
1021 |
+
TTTCT
|
1022 |
+
TTTGA
|
1023 |
+
TTTGC
|
1024 |
+
TTTGG
|
1025 |
+
TTTGT
|
1026 |
+
TTTTA
|
1027 |
+
TTTTC
|
1028 |
+
TTTTG
|
1029 |
+
TTTTT
|
data/prokbert_vocabs/prokbert-base-dna6/vocab.txt
ADDED
@@ -0,0 +1,4101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[PAD]
|
2 |
+
[UNK]
|
3 |
+
[CLS]
|
4 |
+
[SEP]
|
5 |
+
[MASK]
|
6 |
+
AAAAAA
|
7 |
+
AAAAAC
|
8 |
+
AAAAAG
|
9 |
+
AAAAAT
|
10 |
+
AAAACA
|
11 |
+
AAAACC
|
12 |
+
AAAACG
|
13 |
+
AAAACT
|
14 |
+
AAAAGA
|
15 |
+
AAAAGC
|
16 |
+
AAAAGG
|
17 |
+
AAAAGT
|
18 |
+
AAAATA
|
19 |
+
AAAATC
|
20 |
+
AAAATG
|
21 |
+
AAAATT
|
22 |
+
AAACAA
|
23 |
+
AAACAC
|
24 |
+
AAACAG
|
25 |
+
AAACAT
|
26 |
+
AAACCA
|
27 |
+
AAACCC
|
28 |
+
AAACCG
|
29 |
+
AAACCT
|
30 |
+
AAACGA
|
31 |
+
AAACGC
|
32 |
+
AAACGG
|
33 |
+
AAACGT
|
34 |
+
AAACTA
|
35 |
+
AAACTC
|
36 |
+
AAACTG
|
37 |
+
AAACTT
|
38 |
+
AAAGAA
|
39 |
+
AAAGAC
|
40 |
+
AAAGAG
|
41 |
+
AAAGAT
|
42 |
+
AAAGCA
|
43 |
+
AAAGCC
|
44 |
+
AAAGCG
|
45 |
+
AAAGCT
|
46 |
+
AAAGGA
|
47 |
+
AAAGGC
|
48 |
+
AAAGGG
|
49 |
+
AAAGGT
|
50 |
+
AAAGTA
|
51 |
+
AAAGTC
|
52 |
+
AAAGTG
|
53 |
+
AAAGTT
|
54 |
+
AAATAA
|
55 |
+
AAATAC
|
56 |
+
AAATAG
|
57 |
+
AAATAT
|
58 |
+
AAATCA
|
59 |
+
AAATCC
|
60 |
+
AAATCG
|
61 |
+
AAATCT
|
62 |
+
AAATGA
|
63 |
+
AAATGC
|
64 |
+
AAATGG
|
65 |
+
AAATGT
|
66 |
+
AAATTA
|
67 |
+
AAATTC
|
68 |
+
AAATTG
|
69 |
+
AAATTT
|
70 |
+
AACAAA
|
71 |
+
AACAAC
|
72 |
+
AACAAG
|
73 |
+
AACAAT
|
74 |
+
AACACA
|
75 |
+
AACACC
|
76 |
+
AACACG
|
77 |
+
AACACT
|
78 |
+
AACAGA
|
79 |
+
AACAGC
|
80 |
+
AACAGG
|
81 |
+
AACAGT
|
82 |
+
AACATA
|
83 |
+
AACATC
|
84 |
+
AACATG
|
85 |
+
AACATT
|
86 |
+
AACCAA
|
87 |
+
AACCAC
|
88 |
+
AACCAG
|
89 |
+
AACCAT
|
90 |
+
AACCCA
|
91 |
+
AACCCC
|
92 |
+
AACCCG
|
93 |
+
AACCCT
|
94 |
+
AACCGA
|
95 |
+
AACCGC
|
96 |
+
AACCGG
|
97 |
+
AACCGT
|
98 |
+
AACCTA
|
99 |
+
AACCTC
|
100 |
+
AACCTG
|
101 |
+
AACCTT
|
102 |
+
AACGAA
|
103 |
+
AACGAC
|
104 |
+
AACGAG
|
105 |
+
AACGAT
|
106 |
+
AACGCA
|
107 |
+
AACGCC
|
108 |
+
AACGCG
|
109 |
+
AACGCT
|
110 |
+
AACGGA
|
111 |
+
AACGGC
|
112 |
+
AACGGG
|
113 |
+
AACGGT
|
114 |
+
AACGTA
|
115 |
+
AACGTC
|
116 |
+
AACGTG
|
117 |
+
AACGTT
|
118 |
+
AACTAA
|
119 |
+
AACTAC
|
120 |
+
AACTAG
|
121 |
+
AACTAT
|
122 |
+
AACTCA
|
123 |
+
AACTCC
|
124 |
+
AACTCG
|
125 |
+
AACTCT
|
126 |
+
AACTGA
|
127 |
+
AACTGC
|
128 |
+
AACTGG
|
129 |
+
AACTGT
|
130 |
+
AACTTA
|
131 |
+
AACTTC
|
132 |
+
AACTTG
|
133 |
+
AACTTT
|
134 |
+
AAGAAA
|
135 |
+
AAGAAC
|
136 |
+
AAGAAG
|
137 |
+
AAGAAT
|
138 |
+
AAGACA
|
139 |
+
AAGACC
|
140 |
+
AAGACG
|
141 |
+
AAGACT
|
142 |
+
AAGAGA
|
143 |
+
AAGAGC
|
144 |
+
AAGAGG
|
145 |
+
AAGAGT
|
146 |
+
AAGATA
|
147 |
+
AAGATC
|
148 |
+
AAGATG
|
149 |
+
AAGATT
|
150 |
+
AAGCAA
|
151 |
+
AAGCAC
|
152 |
+
AAGCAG
|
153 |
+
AAGCAT
|
154 |
+
AAGCCA
|
155 |
+
AAGCCC
|
156 |
+
AAGCCG
|
157 |
+
AAGCCT
|
158 |
+
AAGCGA
|
159 |
+
AAGCGC
|
160 |
+
AAGCGG
|
161 |
+
AAGCGT
|
162 |
+
AAGCTA
|
163 |
+
AAGCTC
|
164 |
+
AAGCTG
|
165 |
+
AAGCTT
|
166 |
+
AAGGAA
|
167 |
+
AAGGAC
|
168 |
+
AAGGAG
|
169 |
+
AAGGAT
|
170 |
+
AAGGCA
|
171 |
+
AAGGCC
|
172 |
+
AAGGCG
|
173 |
+
AAGGCT
|
174 |
+
AAGGGA
|
175 |
+
AAGGGC
|
176 |
+
AAGGGG
|
177 |
+
AAGGGT
|
178 |
+
AAGGTA
|
179 |
+
AAGGTC
|
180 |
+
AAGGTG
|
181 |
+
AAGGTT
|
182 |
+
AAGTAA
|
183 |
+
AAGTAC
|
184 |
+
AAGTAG
|
185 |
+
AAGTAT
|
186 |
+
AAGTCA
|
187 |
+
AAGTCC
|
188 |
+
AAGTCG
|
189 |
+
AAGTCT
|
190 |
+
AAGTGA
|
191 |
+
AAGTGC
|
192 |
+
AAGTGG
|
193 |
+
AAGTGT
|
194 |
+
AAGTTA
|
195 |
+
AAGTTC
|
196 |
+
AAGTTG
|
197 |
+
AAGTTT
|
198 |
+
AATAAA
|
199 |
+
AATAAC
|
200 |
+
AATAAG
|
201 |
+
AATAAT
|
202 |
+
AATACA
|
203 |
+
AATACC
|
204 |
+
AATACG
|
205 |
+
AATACT
|
206 |
+
AATAGA
|
207 |
+
AATAGC
|
208 |
+
AATAGG
|
209 |
+
AATAGT
|
210 |
+
AATATA
|
211 |
+
AATATC
|
212 |
+
AATATG
|
213 |
+
AATATT
|
214 |
+
AATCAA
|
215 |
+
AATCAC
|
216 |
+
AATCAG
|
217 |
+
AATCAT
|
218 |
+
AATCCA
|
219 |
+
AATCCC
|
220 |
+
AATCCG
|
221 |
+
AATCCT
|
222 |
+
AATCGA
|
223 |
+
AATCGC
|
224 |
+
AATCGG
|
225 |
+
AATCGT
|
226 |
+
AATCTA
|
227 |
+
AATCTC
|
228 |
+
AATCTG
|
229 |
+
AATCTT
|
230 |
+
AATGAA
|
231 |
+
AATGAC
|
232 |
+
AATGAG
|
233 |
+
AATGAT
|
234 |
+
AATGCA
|
235 |
+
AATGCC
|
236 |
+
AATGCG
|
237 |
+
AATGCT
|
238 |
+
AATGGA
|
239 |
+
AATGGC
|
240 |
+
AATGGG
|
241 |
+
AATGGT
|
242 |
+
AATGTA
|
243 |
+
AATGTC
|
244 |
+
AATGTG
|
245 |
+
AATGTT
|
246 |
+
AATTAA
|
247 |
+
AATTAC
|
248 |
+
AATTAG
|
249 |
+
AATTAT
|
250 |
+
AATTCA
|
251 |
+
AATTCC
|
252 |
+
AATTCG
|
253 |
+
AATTCT
|
254 |
+
AATTGA
|
255 |
+
AATTGC
|
256 |
+
AATTGG
|
257 |
+
AATTGT
|
258 |
+
AATTTA
|
259 |
+
AATTTC
|
260 |
+
AATTTG
|
261 |
+
AATTTT
|
262 |
+
ACAAAA
|
263 |
+
ACAAAC
|
264 |
+
ACAAAG
|
265 |
+
ACAAAT
|
266 |
+
ACAACA
|
267 |
+
ACAACC
|
268 |
+
ACAACG
|
269 |
+
ACAACT
|
270 |
+
ACAAGA
|
271 |
+
ACAAGC
|
272 |
+
ACAAGG
|
273 |
+
ACAAGT
|
274 |
+
ACAATA
|
275 |
+
ACAATC
|
276 |
+
ACAATG
|
277 |
+
ACAATT
|
278 |
+
ACACAA
|
279 |
+
ACACAC
|
280 |
+
ACACAG
|
281 |
+
ACACAT
|
282 |
+
ACACCA
|
283 |
+
ACACCC
|
284 |
+
ACACCG
|
285 |
+
ACACCT
|
286 |
+
ACACGA
|
287 |
+
ACACGC
|
288 |
+
ACACGG
|
289 |
+
ACACGT
|
290 |
+
ACACTA
|
291 |
+
ACACTC
|
292 |
+
ACACTG
|
293 |
+
ACACTT
|
294 |
+
ACAGAA
|
295 |
+
ACAGAC
|
296 |
+
ACAGAG
|
297 |
+
ACAGAT
|
298 |
+
ACAGCA
|
299 |
+
ACAGCC
|
300 |
+
ACAGCG
|
301 |
+
ACAGCT
|
302 |
+
ACAGGA
|
303 |
+
ACAGGC
|
304 |
+
ACAGGG
|
305 |
+
ACAGGT
|
306 |
+
ACAGTA
|
307 |
+
ACAGTC
|
308 |
+
ACAGTG
|
309 |
+
ACAGTT
|
310 |
+
ACATAA
|
311 |
+
ACATAC
|
312 |
+
ACATAG
|
313 |
+
ACATAT
|
314 |
+
ACATCA
|
315 |
+
ACATCC
|
316 |
+
ACATCG
|
317 |
+
ACATCT
|
318 |
+
ACATGA
|
319 |
+
ACATGC
|
320 |
+
ACATGG
|
321 |
+
ACATGT
|
322 |
+
ACATTA
|
323 |
+
ACATTC
|
324 |
+
ACATTG
|
325 |
+
ACATTT
|
326 |
+
ACCAAA
|
327 |
+
ACCAAC
|
328 |
+
ACCAAG
|
329 |
+
ACCAAT
|
330 |
+
ACCACA
|
331 |
+
ACCACC
|
332 |
+
ACCACG
|
333 |
+
ACCACT
|
334 |
+
ACCAGA
|
335 |
+
ACCAGC
|
336 |
+
ACCAGG
|
337 |
+
ACCAGT
|
338 |
+
ACCATA
|
339 |
+
ACCATC
|
340 |
+
ACCATG
|
341 |
+
ACCATT
|
342 |
+
ACCCAA
|
343 |
+
ACCCAC
|
344 |
+
ACCCAG
|
345 |
+
ACCCAT
|
346 |
+
ACCCCA
|
347 |
+
ACCCCC
|
348 |
+
ACCCCG
|
349 |
+
ACCCCT
|
350 |
+
ACCCGA
|
351 |
+
ACCCGC
|
352 |
+
ACCCGG
|
353 |
+
ACCCGT
|
354 |
+
ACCCTA
|
355 |
+
ACCCTC
|
356 |
+
ACCCTG
|
357 |
+
ACCCTT
|
358 |
+
ACCGAA
|
359 |
+
ACCGAC
|
360 |
+
ACCGAG
|
361 |
+
ACCGAT
|
362 |
+
ACCGCA
|
363 |
+
ACCGCC
|
364 |
+
ACCGCG
|
365 |
+
ACCGCT
|
366 |
+
ACCGGA
|
367 |
+
ACCGGC
|
368 |
+
ACCGGG
|
369 |
+
ACCGGT
|
370 |
+
ACCGTA
|
371 |
+
ACCGTC
|
372 |
+
ACCGTG
|
373 |
+
ACCGTT
|
374 |
+
ACCTAA
|
375 |
+
ACCTAC
|
376 |
+
ACCTAG
|
377 |
+
ACCTAT
|
378 |
+
ACCTCA
|
379 |
+
ACCTCC
|
380 |
+
ACCTCG
|
381 |
+
ACCTCT
|
382 |
+
ACCTGA
|
383 |
+
ACCTGC
|
384 |
+
ACCTGG
|
385 |
+
ACCTGT
|
386 |
+
ACCTTA
|
387 |
+
ACCTTC
|
388 |
+
ACCTTG
|
389 |
+
ACCTTT
|
390 |
+
ACGAAA
|
391 |
+
ACGAAC
|
392 |
+
ACGAAG
|
393 |
+
ACGAAT
|
394 |
+
ACGACA
|
395 |
+
ACGACC
|
396 |
+
ACGACG
|
397 |
+
ACGACT
|
398 |
+
ACGAGA
|
399 |
+
ACGAGC
|
400 |
+
ACGAGG
|
401 |
+
ACGAGT
|
402 |
+
ACGATA
|
403 |
+
ACGATC
|
404 |
+
ACGATG
|
405 |
+
ACGATT
|
406 |
+
ACGCAA
|
407 |
+
ACGCAC
|
408 |
+
ACGCAG
|
409 |
+
ACGCAT
|
410 |
+
ACGCCA
|
411 |
+
ACGCCC
|
412 |
+
ACGCCG
|
413 |
+
ACGCCT
|
414 |
+
ACGCGA
|
415 |
+
ACGCGC
|
416 |
+
ACGCGG
|
417 |
+
ACGCGT
|
418 |
+
ACGCTA
|
419 |
+
ACGCTC
|
420 |
+
ACGCTG
|
421 |
+
ACGCTT
|
422 |
+
ACGGAA
|
423 |
+
ACGGAC
|
424 |
+
ACGGAG
|
425 |
+
ACGGAT
|
426 |
+
ACGGCA
|
427 |
+
ACGGCC
|
428 |
+
ACGGCG
|
429 |
+
ACGGCT
|
430 |
+
ACGGGA
|
431 |
+
ACGGGC
|
432 |
+
ACGGGG
|
433 |
+
ACGGGT
|
434 |
+
ACGGTA
|
435 |
+
ACGGTC
|
436 |
+
ACGGTG
|
437 |
+
ACGGTT
|
438 |
+
ACGTAA
|
439 |
+
ACGTAC
|
440 |
+
ACGTAG
|
441 |
+
ACGTAT
|
442 |
+
ACGTCA
|
443 |
+
ACGTCC
|
444 |
+
ACGTCG
|
445 |
+
ACGTCT
|
446 |
+
ACGTGA
|
447 |
+
ACGTGC
|
448 |
+
ACGTGG
|
449 |
+
ACGTGT
|
450 |
+
ACGTTA
|
451 |
+
ACGTTC
|
452 |
+
ACGTTG
|
453 |
+
ACGTTT
|
454 |
+
ACTAAA
|
455 |
+
ACTAAC
|
456 |
+
ACTAAG
|
457 |
+
ACTAAT
|
458 |
+
ACTACA
|
459 |
+
ACTACC
|
460 |
+
ACTACG
|
461 |
+
ACTACT
|
462 |
+
ACTAGA
|
463 |
+
ACTAGC
|
464 |
+
ACTAGG
|
465 |
+
ACTAGT
|
466 |
+
ACTATA
|
467 |
+
ACTATC
|
468 |
+
ACTATG
|
469 |
+
ACTATT
|
470 |
+
ACTCAA
|
471 |
+
ACTCAC
|
472 |
+
ACTCAG
|
473 |
+
ACTCAT
|
474 |
+
ACTCCA
|
475 |
+
ACTCCC
|
476 |
+
ACTCCG
|
477 |
+
ACTCCT
|
478 |
+
ACTCGA
|
479 |
+
ACTCGC
|
480 |
+
ACTCGG
|
481 |
+
ACTCGT
|
482 |
+
ACTCTA
|
483 |
+
ACTCTC
|
484 |
+
ACTCTG
|
485 |
+
ACTCTT
|
486 |
+
ACTGAA
|
487 |
+
ACTGAC
|
488 |
+
ACTGAG
|
489 |
+
ACTGAT
|
490 |
+
ACTGCA
|
491 |
+
ACTGCC
|
492 |
+
ACTGCG
|
493 |
+
ACTGCT
|
494 |
+
ACTGGA
|
495 |
+
ACTGGC
|
496 |
+
ACTGGG
|
497 |
+
ACTGGT
|
498 |
+
ACTGTA
|
499 |
+
ACTGTC
|
500 |
+
ACTGTG
|
501 |
+
ACTGTT
|
502 |
+
ACTTAA
|
503 |
+
ACTTAC
|
504 |
+
ACTTAG
|
505 |
+
ACTTAT
|
506 |
+
ACTTCA
|
507 |
+
ACTTCC
|
508 |
+
ACTTCG
|
509 |
+
ACTTCT
|
510 |
+
ACTTGA
|
511 |
+
ACTTGC
|
512 |
+
ACTTGG
|
513 |
+
ACTTGT
|
514 |
+
ACTTTA
|
515 |
+
ACTTTC
|
516 |
+
ACTTTG
|
517 |
+
ACTTTT
|
518 |
+
AGAAAA
|
519 |
+
AGAAAC
|
520 |
+
AGAAAG
|
521 |
+
AGAAAT
|
522 |
+
AGAACA
|
523 |
+
AGAACC
|
524 |
+
AGAACG
|
525 |
+
AGAACT
|
526 |
+
AGAAGA
|
527 |
+
AGAAGC
|
528 |
+
AGAAGG
|
529 |
+
AGAAGT
|
530 |
+
AGAATA
|
531 |
+
AGAATC
|
532 |
+
AGAATG
|
533 |
+
AGAATT
|
534 |
+
AGACAA
|
535 |
+
AGACAC
|
536 |
+
AGACAG
|
537 |
+
AGACAT
|
538 |
+
AGACCA
|
539 |
+
AGACCC
|
540 |
+
AGACCG
|
541 |
+
AGACCT
|
542 |
+
AGACGA
|
543 |
+
AGACGC
|
544 |
+
AGACGG
|
545 |
+
AGACGT
|
546 |
+
AGACTA
|
547 |
+
AGACTC
|
548 |
+
AGACTG
|
549 |
+
AGACTT
|
550 |
+
AGAGAA
|
551 |
+
AGAGAC
|
552 |
+
AGAGAG
|
553 |
+
AGAGAT
|
554 |
+
AGAGCA
|
555 |
+
AGAGCC
|
556 |
+
AGAGCG
|
557 |
+
AGAGCT
|
558 |
+
AGAGGA
|
559 |
+
AGAGGC
|
560 |
+
AGAGGG
|
561 |
+
AGAGGT
|
562 |
+
AGAGTA
|
563 |
+
AGAGTC
|
564 |
+
AGAGTG
|
565 |
+
AGAGTT
|
566 |
+
AGATAA
|
567 |
+
AGATAC
|
568 |
+
AGATAG
|
569 |
+
AGATAT
|
570 |
+
AGATCA
|
571 |
+
AGATCC
|
572 |
+
AGATCG
|
573 |
+
AGATCT
|
574 |
+
AGATGA
|
575 |
+
AGATGC
|
576 |
+
AGATGG
|
577 |
+
AGATGT
|
578 |
+
AGATTA
|
579 |
+
AGATTC
|
580 |
+
AGATTG
|
581 |
+
AGATTT
|
582 |
+
AGCAAA
|
583 |
+
AGCAAC
|
584 |
+
AGCAAG
|
585 |
+
AGCAAT
|
586 |
+
AGCACA
|
587 |
+
AGCACC
|
588 |
+
AGCACG
|
589 |
+
AGCACT
|
590 |
+
AGCAGA
|
591 |
+
AGCAGC
|
592 |
+
AGCAGG
|
593 |
+
AGCAGT
|
594 |
+
AGCATA
|
595 |
+
AGCATC
|
596 |
+
AGCATG
|
597 |
+
AGCATT
|
598 |
+
AGCCAA
|
599 |
+
AGCCAC
|
600 |
+
AGCCAG
|
601 |
+
AGCCAT
|
602 |
+
AGCCCA
|
603 |
+
AGCCCC
|
604 |
+
AGCCCG
|
605 |
+
AGCCCT
|
606 |
+
AGCCGA
|
607 |
+
AGCCGC
|
608 |
+
AGCCGG
|
609 |
+
AGCCGT
|
610 |
+
AGCCTA
|
611 |
+
AGCCTC
|
612 |
+
AGCCTG
|
613 |
+
AGCCTT
|
614 |
+
AGCGAA
|
615 |
+
AGCGAC
|
616 |
+
AGCGAG
|
617 |
+
AGCGAT
|
618 |
+
AGCGCA
|
619 |
+
AGCGCC
|
620 |
+
AGCGCG
|
621 |
+
AGCGCT
|
622 |
+
AGCGGA
|
623 |
+
AGCGGC
|
624 |
+
AGCGGG
|
625 |
+
AGCGGT
|
626 |
+
AGCGTA
|
627 |
+
AGCGTC
|
628 |
+
AGCGTG
|
629 |
+
AGCGTT
|
630 |
+
AGCTAA
|
631 |
+
AGCTAC
|
632 |
+
AGCTAG
|
633 |
+
AGCTAT
|
634 |
+
AGCTCA
|
635 |
+
AGCTCC
|
636 |
+
AGCTCG
|
637 |
+
AGCTCT
|
638 |
+
AGCTGA
|
639 |
+
AGCTGC
|
640 |
+
AGCTGG
|
641 |
+
AGCTGT
|
642 |
+
AGCTTA
|
643 |
+
AGCTTC
|
644 |
+
AGCTTG
|
645 |
+
AGCTTT
|
646 |
+
AGGAAA
|
647 |
+
AGGAAC
|
648 |
+
AGGAAG
|
649 |
+
AGGAAT
|
650 |
+
AGGACA
|
651 |
+
AGGACC
|
652 |
+
AGGACG
|
653 |
+
AGGACT
|
654 |
+
AGGAGA
|
655 |
+
AGGAGC
|
656 |
+
AGGAGG
|
657 |
+
AGGAGT
|
658 |
+
AGGATA
|
659 |
+
AGGATC
|
660 |
+
AGGATG
|
661 |
+
AGGATT
|
662 |
+
AGGCAA
|
663 |
+
AGGCAC
|
664 |
+
AGGCAG
|
665 |
+
AGGCAT
|
666 |
+
AGGCCA
|
667 |
+
AGGCCC
|
668 |
+
AGGCCG
|
669 |
+
AGGCCT
|
670 |
+
AGGCGA
|
671 |
+
AGGCGC
|
672 |
+
AGGCGG
|
673 |
+
AGGCGT
|
674 |
+
AGGCTA
|
675 |
+
AGGCTC
|
676 |
+
AGGCTG
|
677 |
+
AGGCTT
|
678 |
+
AGGGAA
|
679 |
+
AGGGAC
|
680 |
+
AGGGAG
|
681 |
+
AGGGAT
|
682 |
+
AGGGCA
|
683 |
+
AGGGCC
|
684 |
+
AGGGCG
|
685 |
+
AGGGCT
|
686 |
+
AGGGGA
|
687 |
+
AGGGGC
|
688 |
+
AGGGGG
|
689 |
+
AGGGGT
|
690 |
+
AGGGTA
|
691 |
+
AGGGTC
|
692 |
+
AGGGTG
|
693 |
+
AGGGTT
|
694 |
+
AGGTAA
|
695 |
+
AGGTAC
|
696 |
+
AGGTAG
|
697 |
+
AGGTAT
|
698 |
+
AGGTCA
|
699 |
+
AGGTCC
|
700 |
+
AGGTCG
|
701 |
+
AGGTCT
|
702 |
+
AGGTGA
|
703 |
+
AGGTGC
|
704 |
+
AGGTGG
|
705 |
+
AGGTGT
|
706 |
+
AGGTTA
|
707 |
+
AGGTTC
|
708 |
+
AGGTTG
|
709 |
+
AGGTTT
|
710 |
+
AGTAAA
|
711 |
+
AGTAAC
|
712 |
+
AGTAAG
|
713 |
+
AGTAAT
|
714 |
+
AGTACA
|
715 |
+
AGTACC
|
716 |
+
AGTACG
|
717 |
+
AGTACT
|
718 |
+
AGTAGA
|
719 |
+
AGTAGC
|
720 |
+
AGTAGG
|
721 |
+
AGTAGT
|
722 |
+
AGTATA
|
723 |
+
AGTATC
|
724 |
+
AGTATG
|
725 |
+
AGTATT
|
726 |
+
AGTCAA
|
727 |
+
AGTCAC
|
728 |
+
AGTCAG
|
729 |
+
AGTCAT
|
730 |
+
AGTCCA
|
731 |
+
AGTCCC
|
732 |
+
AGTCCG
|
733 |
+
AGTCCT
|
734 |
+
AGTCGA
|
735 |
+
AGTCGC
|
736 |
+
AGTCGG
|
737 |
+
AGTCGT
|
738 |
+
AGTCTA
|
739 |
+
AGTCTC
|
740 |
+
AGTCTG
|
741 |
+
AGTCTT
|
742 |
+
AGTGAA
|
743 |
+
AGTGAC
|
744 |
+
AGTGAG
|
745 |
+
AGTGAT
|
746 |
+
AGTGCA
|
747 |
+
AGTGCC
|
748 |
+
AGTGCG
|
749 |
+
AGTGCT
|
750 |
+
AGTGGA
|
751 |
+
AGTGGC
|
752 |
+
AGTGGG
|
753 |
+
AGTGGT
|
754 |
+
AGTGTA
|
755 |
+
AGTGTC
|
756 |
+
AGTGTG
|
757 |
+
AGTGTT
|
758 |
+
AGTTAA
|
759 |
+
AGTTAC
|
760 |
+
AGTTAG
|
761 |
+
AGTTAT
|
762 |
+
AGTTCA
|
763 |
+
AGTTCC
|
764 |
+
AGTTCG
|
765 |
+
AGTTCT
|
766 |
+
AGTTGA
|
767 |
+
AGTTGC
|
768 |
+
AGTTGG
|
769 |
+
AGTTGT
|
770 |
+
AGTTTA
|
771 |
+
AGTTTC
|
772 |
+
AGTTTG
|
773 |
+
AGTTTT
|
774 |
+
ATAAAA
|
775 |
+
ATAAAC
|
776 |
+
ATAAAG
|
777 |
+
ATAAAT
|
778 |
+
ATAACA
|
779 |
+
ATAACC
|
780 |
+
ATAACG
|
781 |
+
ATAACT
|
782 |
+
ATAAGA
|
783 |
+
ATAAGC
|
784 |
+
ATAAGG
|
785 |
+
ATAAGT
|
786 |
+
ATAATA
|
787 |
+
ATAATC
|
788 |
+
ATAATG
|
789 |
+
ATAATT
|
790 |
+
ATACAA
|
791 |
+
ATACAC
|
792 |
+
ATACAG
|
793 |
+
ATACAT
|
794 |
+
ATACCA
|
795 |
+
ATACCC
|
796 |
+
ATACCG
|
797 |
+
ATACCT
|
798 |
+
ATACGA
|
799 |
+
ATACGC
|
800 |
+
ATACGG
|
801 |
+
ATACGT
|
802 |
+
ATACTA
|
803 |
+
ATACTC
|
804 |
+
ATACTG
|
805 |
+
ATACTT
|
806 |
+
ATAGAA
|
807 |
+
ATAGAC
|
808 |
+
ATAGAG
|
809 |
+
ATAGAT
|
810 |
+
ATAGCA
|
811 |
+
ATAGCC
|
812 |
+
ATAGCG
|
813 |
+
ATAGCT
|
814 |
+
ATAGGA
|
815 |
+
ATAGGC
|
816 |
+
ATAGGG
|
817 |
+
ATAGGT
|
818 |
+
ATAGTA
|
819 |
+
ATAGTC
|
820 |
+
ATAGTG
|
821 |
+
ATAGTT
|
822 |
+
ATATAA
|
823 |
+
ATATAC
|
824 |
+
ATATAG
|
825 |
+
ATATAT
|
826 |
+
ATATCA
|
827 |
+
ATATCC
|
828 |
+
ATATCG
|
829 |
+
ATATCT
|
830 |
+
ATATGA
|
831 |
+
ATATGC
|
832 |
+
ATATGG
|
833 |
+
ATATGT
|
834 |
+
ATATTA
|
835 |
+
ATATTC
|
836 |
+
ATATTG
|
837 |
+
ATATTT
|
838 |
+
ATCAAA
|
839 |
+
ATCAAC
|
840 |
+
ATCAAG
|
841 |
+
ATCAAT
|
842 |
+
ATCACA
|
843 |
+
ATCACC
|
844 |
+
ATCACG
|
845 |
+
ATCACT
|
846 |
+
ATCAGA
|
847 |
+
ATCAGC
|
848 |
+
ATCAGG
|
849 |
+
ATCAGT
|
850 |
+
ATCATA
|
851 |
+
ATCATC
|
852 |
+
ATCATG
|
853 |
+
ATCATT
|
854 |
+
ATCCAA
|
855 |
+
ATCCAC
|
856 |
+
ATCCAG
|
857 |
+
ATCCAT
|
858 |
+
ATCCCA
|
859 |
+
ATCCCC
|
860 |
+
ATCCCG
|
861 |
+
ATCCCT
|
862 |
+
ATCCGA
|
863 |
+
ATCCGC
|
864 |
+
ATCCGG
|
865 |
+
ATCCGT
|
866 |
+
ATCCTA
|
867 |
+
ATCCTC
|
868 |
+
ATCCTG
|
869 |
+
ATCCTT
|
870 |
+
ATCGAA
|
871 |
+
ATCGAC
|
872 |
+
ATCGAG
|
873 |
+
ATCGAT
|
874 |
+
ATCGCA
|
875 |
+
ATCGCC
|
876 |
+
ATCGCG
|
877 |
+
ATCGCT
|
878 |
+
ATCGGA
|
879 |
+
ATCGGC
|
880 |
+
ATCGGG
|
881 |
+
ATCGGT
|
882 |
+
ATCGTA
|
883 |
+
ATCGTC
|
884 |
+
ATCGTG
|
885 |
+
ATCGTT
|
886 |
+
ATCTAA
|
887 |
+
ATCTAC
|
888 |
+
ATCTAG
|
889 |
+
ATCTAT
|
890 |
+
ATCTCA
|
891 |
+
ATCTCC
|
892 |
+
ATCTCG
|
893 |
+
ATCTCT
|
894 |
+
ATCTGA
|
895 |
+
ATCTGC
|
896 |
+
ATCTGG
|
897 |
+
ATCTGT
|
898 |
+
ATCTTA
|
899 |
+
ATCTTC
|
900 |
+
ATCTTG
|
901 |
+
ATCTTT
|
902 |
+
ATGAAA
|
903 |
+
ATGAAC
|
904 |
+
ATGAAG
|
905 |
+
ATGAAT
|
906 |
+
ATGACA
|
907 |
+
ATGACC
|
908 |
+
ATGACG
|
909 |
+
ATGACT
|
910 |
+
ATGAGA
|
911 |
+
ATGAGC
|
912 |
+
ATGAGG
|
913 |
+
ATGAGT
|
914 |
+
ATGATA
|
915 |
+
ATGATC
|
916 |
+
ATGATG
|
917 |
+
ATGATT
|
918 |
+
ATGCAA
|
919 |
+
ATGCAC
|
920 |
+
ATGCAG
|
921 |
+
ATGCAT
|
922 |
+
ATGCCA
|
923 |
+
ATGCCC
|
924 |
+
ATGCCG
|
925 |
+
ATGCCT
|
926 |
+
ATGCGA
|
927 |
+
ATGCGC
|
928 |
+
ATGCGG
|
929 |
+
ATGCGT
|
930 |
+
ATGCTA
|
931 |
+
ATGCTC
|
932 |
+
ATGCTG
|
933 |
+
ATGCTT
|
934 |
+
ATGGAA
|
935 |
+
ATGGAC
|
936 |
+
ATGGAG
|
937 |
+
ATGGAT
|
938 |
+
ATGGCA
|
939 |
+
ATGGCC
|
940 |
+
ATGGCG
|
941 |
+
ATGGCT
|
942 |
+
ATGGGA
|
943 |
+
ATGGGC
|
944 |
+
ATGGGG
|
945 |
+
ATGGGT
|
946 |
+
ATGGTA
|
947 |
+
ATGGTC
|
948 |
+
ATGGTG
|
949 |
+
ATGGTT
|
950 |
+
ATGTAA
|
951 |
+
ATGTAC
|
952 |
+
ATGTAG
|
953 |
+
ATGTAT
|
954 |
+
ATGTCA
|
955 |
+
ATGTCC
|
956 |
+
ATGTCG
|
957 |
+
ATGTCT
|
958 |
+
ATGTGA
|
959 |
+
ATGTGC
|
960 |
+
ATGTGG
|
961 |
+
ATGTGT
|
962 |
+
ATGTTA
|
963 |
+
ATGTTC
|
964 |
+
ATGTTG
|
965 |
+
ATGTTT
|
966 |
+
ATTAAA
|
967 |
+
ATTAAC
|
968 |
+
ATTAAG
|
969 |
+
ATTAAT
|
970 |
+
ATTACA
|
971 |
+
ATTACC
|
972 |
+
ATTACG
|
973 |
+
ATTACT
|
974 |
+
ATTAGA
|
975 |
+
ATTAGC
|
976 |
+
ATTAGG
|
977 |
+
ATTAGT
|
978 |
+
ATTATA
|
979 |
+
ATTATC
|
980 |
+
ATTATG
|
981 |
+
ATTATT
|
982 |
+
ATTCAA
|
983 |
+
ATTCAC
|
984 |
+
ATTCAG
|
985 |
+
ATTCAT
|
986 |
+
ATTCCA
|
987 |
+
ATTCCC
|
988 |
+
ATTCCG
|
989 |
+
ATTCCT
|
990 |
+
ATTCGA
|
991 |
+
ATTCGC
|
992 |
+
ATTCGG
|
993 |
+
ATTCGT
|
994 |
+
ATTCTA
|
995 |
+
ATTCTC
|
996 |
+
ATTCTG
|
997 |
+
ATTCTT
|
998 |
+
ATTGAA
|
999 |
+
ATTGAC
|
1000 |
+
ATTGAG
|
1001 |
+
ATTGAT
|
1002 |
+
ATTGCA
|
1003 |
+
ATTGCC
|
1004 |
+
ATTGCG
|
1005 |
+
ATTGCT
|
1006 |
+
ATTGGA
|
1007 |
+
ATTGGC
|
1008 |
+
ATTGGG
|
1009 |
+
ATTGGT
|
1010 |
+
ATTGTA
|
1011 |
+
ATTGTC
|
1012 |
+
ATTGTG
|
1013 |
+
ATTGTT
|
1014 |
+
ATTTAA
|
1015 |
+
ATTTAC
|
1016 |
+
ATTTAG
|
1017 |
+
ATTTAT
|
1018 |
+
ATTTCA
|
1019 |
+
ATTTCC
|
1020 |
+
ATTTCG
|
1021 |
+
ATTTCT
|
1022 |
+
ATTTGA
|
1023 |
+
ATTTGC
|
1024 |
+
ATTTGG
|
1025 |
+
ATTTGT
|
1026 |
+
ATTTTA
|
1027 |
+
ATTTTC
|
1028 |
+
ATTTTG
|
1029 |
+
ATTTTT
|
1030 |
+
CAAAAA
|
1031 |
+
CAAAAC
|
1032 |
+
CAAAAG
|
1033 |
+
CAAAAT
|
1034 |
+
CAAACA
|
1035 |
+
CAAACC
|
1036 |
+
CAAACG
|
1037 |
+
CAAACT
|
1038 |
+
CAAAGA
|
1039 |
+
CAAAGC
|
1040 |
+
CAAAGG
|
1041 |
+
CAAAGT
|
1042 |
+
CAAATA
|
1043 |
+
CAAATC
|
1044 |
+
CAAATG
|
1045 |
+
CAAATT
|
1046 |
+
CAACAA
|
1047 |
+
CAACAC
|
1048 |
+
CAACAG
|
1049 |
+
CAACAT
|
1050 |
+
CAACCA
|
1051 |
+
CAACCC
|
1052 |
+
CAACCG
|
1053 |
+
CAACCT
|
1054 |
+
CAACGA
|
1055 |
+
CAACGC
|
1056 |
+
CAACGG
|
1057 |
+
CAACGT
|
1058 |
+
CAACTA
|
1059 |
+
CAACTC
|
1060 |
+
CAACTG
|
1061 |
+
CAACTT
|
1062 |
+
CAAGAA
|
1063 |
+
CAAGAC
|
1064 |
+
CAAGAG
|
1065 |
+
CAAGAT
|
1066 |
+
CAAGCA
|
1067 |
+
CAAGCC
|
1068 |
+
CAAGCG
|
1069 |
+
CAAGCT
|
1070 |
+
CAAGGA
|
1071 |
+
CAAGGC
|
1072 |
+
CAAGGG
|
1073 |
+
CAAGGT
|
1074 |
+
CAAGTA
|
1075 |
+
CAAGTC
|
1076 |
+
CAAGTG
|
1077 |
+
CAAGTT
|
1078 |
+
CAATAA
|
1079 |
+
CAATAC
|
1080 |
+
CAATAG
|
1081 |
+
CAATAT
|
1082 |
+
CAATCA
|
1083 |
+
CAATCC
|
1084 |
+
CAATCG
|
1085 |
+
CAATCT
|
1086 |
+
CAATGA
|
1087 |
+
CAATGC
|
1088 |
+
CAATGG
|
1089 |
+
CAATGT
|
1090 |
+
CAATTA
|
1091 |
+
CAATTC
|
1092 |
+
CAATTG
|
1093 |
+
CAATTT
|
1094 |
+
CACAAA
|
1095 |
+
CACAAC
|
1096 |
+
CACAAG
|
1097 |
+
CACAAT
|
1098 |
+
CACACA
|
1099 |
+
CACACC
|
1100 |
+
CACACG
|
1101 |
+
CACACT
|
1102 |
+
CACAGA
|
1103 |
+
CACAGC
|
1104 |
+
CACAGG
|
1105 |
+
CACAGT
|
1106 |
+
CACATA
|
1107 |
+
CACATC
|
1108 |
+
CACATG
|
1109 |
+
CACATT
|
1110 |
+
CACCAA
|
1111 |
+
CACCAC
|
1112 |
+
CACCAG
|
1113 |
+
CACCAT
|
1114 |
+
CACCCA
|
1115 |
+
CACCCC
|
1116 |
+
CACCCG
|
1117 |
+
CACCCT
|
1118 |
+
CACCGA
|
1119 |
+
CACCGC
|
1120 |
+
CACCGG
|
1121 |
+
CACCGT
|
1122 |
+
CACCTA
|
1123 |
+
CACCTC
|
1124 |
+
CACCTG
|
1125 |
+
CACCTT
|
1126 |
+
CACGAA
|
1127 |
+
CACGAC
|
1128 |
+
CACGAG
|
1129 |
+
CACGAT
|
1130 |
+
CACGCA
|
1131 |
+
CACGCC
|
1132 |
+
CACGCG
|
1133 |
+
CACGCT
|
1134 |
+
CACGGA
|
1135 |
+
CACGGC
|
1136 |
+
CACGGG
|
1137 |
+
CACGGT
|
1138 |
+
CACGTA
|
1139 |
+
CACGTC
|
1140 |
+
CACGTG
|
1141 |
+
CACGTT
|
1142 |
+
CACTAA
|
1143 |
+
CACTAC
|
1144 |
+
CACTAG
|
1145 |
+
CACTAT
|
1146 |
+
CACTCA
|
1147 |
+
CACTCC
|
1148 |
+
CACTCG
|
1149 |
+
CACTCT
|
1150 |
+
CACTGA
|
1151 |
+
CACTGC
|
1152 |
+
CACTGG
|
1153 |
+
CACTGT
|
1154 |
+
CACTTA
|
1155 |
+
CACTTC
|
1156 |
+
CACTTG
|
1157 |
+
CACTTT
|
1158 |
+
CAGAAA
|
1159 |
+
CAGAAC
|
1160 |
+
CAGAAG
|
1161 |
+
CAGAAT
|
1162 |
+
CAGACA
|
1163 |
+
CAGACC
|
1164 |
+
CAGACG
|
1165 |
+
CAGACT
|
1166 |
+
CAGAGA
|
1167 |
+
CAGAGC
|
1168 |
+
CAGAGG
|
1169 |
+
CAGAGT
|
1170 |
+
CAGATA
|
1171 |
+
CAGATC
|
1172 |
+
CAGATG
|
1173 |
+
CAGATT
|
1174 |
+
CAGCAA
|
1175 |
+
CAGCAC
|
1176 |
+
CAGCAG
|
1177 |
+
CAGCAT
|
1178 |
+
CAGCCA
|
1179 |
+
CAGCCC
|
1180 |
+
CAGCCG
|
1181 |
+
CAGCCT
|
1182 |
+
CAGCGA
|
1183 |
+
CAGCGC
|
1184 |
+
CAGCGG
|
1185 |
+
CAGCGT
|
1186 |
+
CAGCTA
|
1187 |
+
CAGCTC
|
1188 |
+
CAGCTG
|
1189 |
+
CAGCTT
|
1190 |
+
CAGGAA
|
1191 |
+
CAGGAC
|
1192 |
+
CAGGAG
|
1193 |
+
CAGGAT
|
1194 |
+
CAGGCA
|
1195 |
+
CAGGCC
|
1196 |
+
CAGGCG
|
1197 |
+
CAGGCT
|
1198 |
+
CAGGGA
|
1199 |
+
CAGGGC
|
1200 |
+
CAGGGG
|
1201 |
+
CAGGGT
|
1202 |
+
CAGGTA
|
1203 |
+
CAGGTC
|
1204 |
+
CAGGTG
|
1205 |
+
CAGGTT
|
1206 |
+
CAGTAA
|
1207 |
+
CAGTAC
|
1208 |
+
CAGTAG
|
1209 |
+
CAGTAT
|
1210 |
+
CAGTCA
|
1211 |
+
CAGTCC
|
1212 |
+
CAGTCG
|
1213 |
+
CAGTCT
|
1214 |
+
CAGTGA
|
1215 |
+
CAGTGC
|
1216 |
+
CAGTGG
|
1217 |
+
CAGTGT
|
1218 |
+
CAGTTA
|
1219 |
+
CAGTTC
|
1220 |
+
CAGTTG
|
1221 |
+
CAGTTT
|
1222 |
+
CATAAA
|
1223 |
+
CATAAC
|
1224 |
+
CATAAG
|
1225 |
+
CATAAT
|
1226 |
+
CATACA
|
1227 |
+
CATACC
|
1228 |
+
CATACG
|
1229 |
+
CATACT
|
1230 |
+
CATAGA
|
1231 |
+
CATAGC
|
1232 |
+
CATAGG
|
1233 |
+
CATAGT
|
1234 |
+
CATATA
|
1235 |
+
CATATC
|
1236 |
+
CATATG
|
1237 |
+
CATATT
|
1238 |
+
CATCAA
|
1239 |
+
CATCAC
|
1240 |
+
CATCAG
|
1241 |
+
CATCAT
|
1242 |
+
CATCCA
|
1243 |
+
CATCCC
|
1244 |
+
CATCCG
|
1245 |
+
CATCCT
|
1246 |
+
CATCGA
|
1247 |
+
CATCGC
|
1248 |
+
CATCGG
|
1249 |
+
CATCGT
|
1250 |
+
CATCTA
|
1251 |
+
CATCTC
|
1252 |
+
CATCTG
|
1253 |
+
CATCTT
|
1254 |
+
CATGAA
|
1255 |
+
CATGAC
|
1256 |
+
CATGAG
|
1257 |
+
CATGAT
|
1258 |
+
CATGCA
|
1259 |
+
CATGCC
|
1260 |
+
CATGCG
|
1261 |
+
CATGCT
|
1262 |
+
CATGGA
|
1263 |
+
CATGGC
|
1264 |
+
CATGGG
|
1265 |
+
CATGGT
|
1266 |
+
CATGTA
|
1267 |
+
CATGTC
|
1268 |
+
CATGTG
|
1269 |
+
CATGTT
|
1270 |
+
CATTAA
|
1271 |
+
CATTAC
|
1272 |
+
CATTAG
|
1273 |
+
CATTAT
|
1274 |
+
CATTCA
|
1275 |
+
CATTCC
|
1276 |
+
CATTCG
|
1277 |
+
CATTCT
|
1278 |
+
CATTGA
|
1279 |
+
CATTGC
|
1280 |
+
CATTGG
|
1281 |
+
CATTGT
|
1282 |
+
CATTTA
|
1283 |
+
CATTTC
|
1284 |
+
CATTTG
|
1285 |
+
CATTTT
|
1286 |
+
CCAAAA
|
1287 |
+
CCAAAC
|
1288 |
+
CCAAAG
|
1289 |
+
CCAAAT
|
1290 |
+
CCAACA
|
1291 |
+
CCAACC
|
1292 |
+
CCAACG
|
1293 |
+
CCAACT
|
1294 |
+
CCAAGA
|
1295 |
+
CCAAGC
|
1296 |
+
CCAAGG
|
1297 |
+
CCAAGT
|
1298 |
+
CCAATA
|
1299 |
+
CCAATC
|
1300 |
+
CCAATG
|
1301 |
+
CCAATT
|
1302 |
+
CCACAA
|
1303 |
+
CCACAC
|
1304 |
+
CCACAG
|
1305 |
+
CCACAT
|
1306 |
+
CCACCA
|
1307 |
+
CCACCC
|
1308 |
+
CCACCG
|
1309 |
+
CCACCT
|
1310 |
+
CCACGA
|
1311 |
+
CCACGC
|
1312 |
+
CCACGG
|
1313 |
+
CCACGT
|
1314 |
+
CCACTA
|
1315 |
+
CCACTC
|
1316 |
+
CCACTG
|
1317 |
+
CCACTT
|
1318 |
+
CCAGAA
|
1319 |
+
CCAGAC
|
1320 |
+
CCAGAG
|
1321 |
+
CCAGAT
|
1322 |
+
CCAGCA
|
1323 |
+
CCAGCC
|
1324 |
+
CCAGCG
|
1325 |
+
CCAGCT
|
1326 |
+
CCAGGA
|
1327 |
+
CCAGGC
|
1328 |
+
CCAGGG
|
1329 |
+
CCAGGT
|
1330 |
+
CCAGTA
|
1331 |
+
CCAGTC
|
1332 |
+
CCAGTG
|
1333 |
+
CCAGTT
|
1334 |
+
CCATAA
|
1335 |
+
CCATAC
|
1336 |
+
CCATAG
|
1337 |
+
CCATAT
|
1338 |
+
CCATCA
|
1339 |
+
CCATCC
|
1340 |
+
CCATCG
|
1341 |
+
CCATCT
|
1342 |
+
CCATGA
|
1343 |
+
CCATGC
|
1344 |
+
CCATGG
|
1345 |
+
CCATGT
|
1346 |
+
CCATTA
|
1347 |
+
CCATTC
|
1348 |
+
CCATTG
|
1349 |
+
CCATTT
|
1350 |
+
CCCAAA
|
1351 |
+
CCCAAC
|
1352 |
+
CCCAAG
|
1353 |
+
CCCAAT
|
1354 |
+
CCCACA
|
1355 |
+
CCCACC
|
1356 |
+
CCCACG
|
1357 |
+
CCCACT
|
1358 |
+
CCCAGA
|
1359 |
+
CCCAGC
|
1360 |
+
CCCAGG
|
1361 |
+
CCCAGT
|
1362 |
+
CCCATA
|
1363 |
+
CCCATC
|
1364 |
+
CCCATG
|
1365 |
+
CCCATT
|
1366 |
+
CCCCAA
|
1367 |
+
CCCCAC
|
1368 |
+
CCCCAG
|
1369 |
+
CCCCAT
|
1370 |
+
CCCCCA
|
1371 |
+
CCCCCC
|
1372 |
+
CCCCCG
|
1373 |
+
CCCCCT
|
1374 |
+
CCCCGA
|
1375 |
+
CCCCGC
|
1376 |
+
CCCCGG
|
1377 |
+
CCCCGT
|
1378 |
+
CCCCTA
|
1379 |
+
CCCCTC
|
1380 |
+
CCCCTG
|
1381 |
+
CCCCTT
|
1382 |
+
CCCGAA
|
1383 |
+
CCCGAC
|
1384 |
+
CCCGAG
|
1385 |
+
CCCGAT
|
1386 |
+
CCCGCA
|
1387 |
+
CCCGCC
|
1388 |
+
CCCGCG
|
1389 |
+
CCCGCT
|
1390 |
+
CCCGGA
|
1391 |
+
CCCGGC
|
1392 |
+
CCCGGG
|
1393 |
+
CCCGGT
|
1394 |
+
CCCGTA
|
1395 |
+
CCCGTC
|
1396 |
+
CCCGTG
|
1397 |
+
CCCGTT
|
1398 |
+
CCCTAA
|
1399 |
+
CCCTAC
|
1400 |
+
CCCTAG
|
1401 |
+
CCCTAT
|
1402 |
+
CCCTCA
|
1403 |
+
CCCTCC
|
1404 |
+
CCCTCG
|
1405 |
+
CCCTCT
|
1406 |
+
CCCTGA
|
1407 |
+
CCCTGC
|
1408 |
+
CCCTGG
|
1409 |
+
CCCTGT
|
1410 |
+
CCCTTA
|
1411 |
+
CCCTTC
|
1412 |
+
CCCTTG
|
1413 |
+
CCCTTT
|
1414 |
+
CCGAAA
|
1415 |
+
CCGAAC
|
1416 |
+
CCGAAG
|
1417 |
+
CCGAAT
|
1418 |
+
CCGACA
|
1419 |
+
CCGACC
|
1420 |
+
CCGACG
|
1421 |
+
CCGACT
|
1422 |
+
CCGAGA
|
1423 |
+
CCGAGC
|
1424 |
+
CCGAGG
|
1425 |
+
CCGAGT
|
1426 |
+
CCGATA
|
1427 |
+
CCGATC
|
1428 |
+
CCGATG
|
1429 |
+
CCGATT
|
1430 |
+
CCGCAA
|
1431 |
+
CCGCAC
|
1432 |
+
CCGCAG
|
1433 |
+
CCGCAT
|
1434 |
+
CCGCCA
|
1435 |
+
CCGCCC
|
1436 |
+
CCGCCG
|
1437 |
+
CCGCCT
|
1438 |
+
CCGCGA
|
1439 |
+
CCGCGC
|
1440 |
+
CCGCGG
|
1441 |
+
CCGCGT
|
1442 |
+
CCGCTA
|
1443 |
+
CCGCTC
|
1444 |
+
CCGCTG
|
1445 |
+
CCGCTT
|
1446 |
+
CCGGAA
|
1447 |
+
CCGGAC
|
1448 |
+
CCGGAG
|
1449 |
+
CCGGAT
|
1450 |
+
CCGGCA
|
1451 |
+
CCGGCC
|
1452 |
+
CCGGCG
|
1453 |
+
CCGGCT
|
1454 |
+
CCGGGA
|
1455 |
+
CCGGGC
|
1456 |
+
CCGGGG
|
1457 |
+
CCGGGT
|
1458 |
+
CCGGTA
|
1459 |
+
CCGGTC
|
1460 |
+
CCGGTG
|
1461 |
+
CCGGTT
|
1462 |
+
CCGTAA
|
1463 |
+
CCGTAC
|
1464 |
+
CCGTAG
|
1465 |
+
CCGTAT
|
1466 |
+
CCGTCA
|
1467 |
+
CCGTCC
|
1468 |
+
CCGTCG
|
1469 |
+
CCGTCT
|
1470 |
+
CCGTGA
|
1471 |
+
CCGTGC
|
1472 |
+
CCGTGG
|
1473 |
+
CCGTGT
|
1474 |
+
CCGTTA
|
1475 |
+
CCGTTC
|
1476 |
+
CCGTTG
|
1477 |
+
CCGTTT
|
1478 |
+
CCTAAA
|
1479 |
+
CCTAAC
|
1480 |
+
CCTAAG
|
1481 |
+
CCTAAT
|
1482 |
+
CCTACA
|
1483 |
+
CCTACC
|
1484 |
+
CCTACG
|
1485 |
+
CCTACT
|
1486 |
+
CCTAGA
|
1487 |
+
CCTAGC
|
1488 |
+
CCTAGG
|
1489 |
+
CCTAGT
|
1490 |
+
CCTATA
|
1491 |
+
CCTATC
|
1492 |
+
CCTATG
|
1493 |
+
CCTATT
|
1494 |
+
CCTCAA
|
1495 |
+
CCTCAC
|
1496 |
+
CCTCAG
|
1497 |
+
CCTCAT
|
1498 |
+
CCTCCA
|
1499 |
+
CCTCCC
|
1500 |
+
CCTCCG
|
1501 |
+
CCTCCT
|
1502 |
+
CCTCGA
|
1503 |
+
CCTCGC
|
1504 |
+
CCTCGG
|
1505 |
+
CCTCGT
|
1506 |
+
CCTCTA
|
1507 |
+
CCTCTC
|
1508 |
+
CCTCTG
|
1509 |
+
CCTCTT
|
1510 |
+
CCTGAA
|
1511 |
+
CCTGAC
|
1512 |
+
CCTGAG
|
1513 |
+
CCTGAT
|
1514 |
+
CCTGCA
|
1515 |
+
CCTGCC
|
1516 |
+
CCTGCG
|
1517 |
+
CCTGCT
|
1518 |
+
CCTGGA
|
1519 |
+
CCTGGC
|
1520 |
+
CCTGGG
|
1521 |
+
CCTGGT
|
1522 |
+
CCTGTA
|
1523 |
+
CCTGTC
|
1524 |
+
CCTGTG
|
1525 |
+
CCTGTT
|
1526 |
+
CCTTAA
|
1527 |
+
CCTTAC
|
1528 |
+
CCTTAG
|
1529 |
+
CCTTAT
|
1530 |
+
CCTTCA
|
1531 |
+
CCTTCC
|
1532 |
+
CCTTCG
|
1533 |
+
CCTTCT
|
1534 |
+
CCTTGA
|
1535 |
+
CCTTGC
|
1536 |
+
CCTTGG
|
1537 |
+
CCTTGT
|
1538 |
+
CCTTTA
|
1539 |
+
CCTTTC
|
1540 |
+
CCTTTG
|
1541 |
+
CCTTTT
|
1542 |
+
CGAAAA
|
1543 |
+
CGAAAC
|
1544 |
+
CGAAAG
|
1545 |
+
CGAAAT
|
1546 |
+
CGAACA
|
1547 |
+
CGAACC
|
1548 |
+
CGAACG
|
1549 |
+
CGAACT
|
1550 |
+
CGAAGA
|
1551 |
+
CGAAGC
|
1552 |
+
CGAAGG
|
1553 |
+
CGAAGT
|
1554 |
+
CGAATA
|
1555 |
+
CGAATC
|
1556 |
+
CGAATG
|
1557 |
+
CGAATT
|
1558 |
+
CGACAA
|
1559 |
+
CGACAC
|
1560 |
+
CGACAG
|
1561 |
+
CGACAT
|
1562 |
+
CGACCA
|
1563 |
+
CGACCC
|
1564 |
+
CGACCG
|
1565 |
+
CGACCT
|
1566 |
+
CGACGA
|
1567 |
+
CGACGC
|
1568 |
+
CGACGG
|
1569 |
+
CGACGT
|
1570 |
+
CGACTA
|
1571 |
+
CGACTC
|
1572 |
+
CGACTG
|
1573 |
+
CGACTT
|
1574 |
+
CGAGAA
|
1575 |
+
CGAGAC
|
1576 |
+
CGAGAG
|
1577 |
+
CGAGAT
|
1578 |
+
CGAGCA
|
1579 |
+
CGAGCC
|
1580 |
+
CGAGCG
|
1581 |
+
CGAGCT
|
1582 |
+
CGAGGA
|
1583 |
+
CGAGGC
|
1584 |
+
CGAGGG
|
1585 |
+
CGAGGT
|
1586 |
+
CGAGTA
|
1587 |
+
CGAGTC
|
1588 |
+
CGAGTG
|
1589 |
+
CGAGTT
|
1590 |
+
CGATAA
|
1591 |
+
CGATAC
|
1592 |
+
CGATAG
|
1593 |
+
CGATAT
|
1594 |
+
CGATCA
|
1595 |
+
CGATCC
|
1596 |
+
CGATCG
|
1597 |
+
CGATCT
|
1598 |
+
CGATGA
|
1599 |
+
CGATGC
|
1600 |
+
CGATGG
|
1601 |
+
CGATGT
|
1602 |
+
CGATTA
|
1603 |
+
CGATTC
|
1604 |
+
CGATTG
|
1605 |
+
CGATTT
|
1606 |
+
CGCAAA
|
1607 |
+
CGCAAC
|
1608 |
+
CGCAAG
|
1609 |
+
CGCAAT
|
1610 |
+
CGCACA
|
1611 |
+
CGCACC
|
1612 |
+
CGCACG
|
1613 |
+
CGCACT
|
1614 |
+
CGCAGA
|
1615 |
+
CGCAGC
|
1616 |
+
CGCAGG
|
1617 |
+
CGCAGT
|
1618 |
+
CGCATA
|
1619 |
+
CGCATC
|
1620 |
+
CGCATG
|
1621 |
+
CGCATT
|
1622 |
+
CGCCAA
|
1623 |
+
CGCCAC
|
1624 |
+
CGCCAG
|
1625 |
+
CGCCAT
|
1626 |
+
CGCCCA
|
1627 |
+
CGCCCC
|
1628 |
+
CGCCCG
|
1629 |
+
CGCCCT
|
1630 |
+
CGCCGA
|
1631 |
+
CGCCGC
|
1632 |
+
CGCCGG
|
1633 |
+
CGCCGT
|
1634 |
+
CGCCTA
|
1635 |
+
CGCCTC
|
1636 |
+
CGCCTG
|
1637 |
+
CGCCTT
|
1638 |
+
CGCGAA
|
1639 |
+
CGCGAC
|
1640 |
+
CGCGAG
|
1641 |
+
CGCGAT
|
1642 |
+
CGCGCA
|
1643 |
+
CGCGCC
|
1644 |
+
CGCGCG
|
1645 |
+
CGCGCT
|
1646 |
+
CGCGGA
|
1647 |
+
CGCGGC
|
1648 |
+
CGCGGG
|
1649 |
+
CGCGGT
|
1650 |
+
CGCGTA
|
1651 |
+
CGCGTC
|
1652 |
+
CGCGTG
|
1653 |
+
CGCGTT
|
1654 |
+
CGCTAA
|
1655 |
+
CGCTAC
|
1656 |
+
CGCTAG
|
1657 |
+
CGCTAT
|
1658 |
+
CGCTCA
|
1659 |
+
CGCTCC
|
1660 |
+
CGCTCG
|
1661 |
+
CGCTCT
|
1662 |
+
CGCTGA
|
1663 |
+
CGCTGC
|
1664 |
+
CGCTGG
|
1665 |
+
CGCTGT
|
1666 |
+
CGCTTA
|
1667 |
+
CGCTTC
|
1668 |
+
CGCTTG
|
1669 |
+
CGCTTT
|
1670 |
+
CGGAAA
|
1671 |
+
CGGAAC
|
1672 |
+
CGGAAG
|
1673 |
+
CGGAAT
|
1674 |
+
CGGACA
|
1675 |
+
CGGACC
|
1676 |
+
CGGACG
|
1677 |
+
CGGACT
|
1678 |
+
CGGAGA
|
1679 |
+
CGGAGC
|
1680 |
+
CGGAGG
|
1681 |
+
CGGAGT
|
1682 |
+
CGGATA
|
1683 |
+
CGGATC
|
1684 |
+
CGGATG
|
1685 |
+
CGGATT
|
1686 |
+
CGGCAA
|
1687 |
+
CGGCAC
|
1688 |
+
CGGCAG
|
1689 |
+
CGGCAT
|
1690 |
+
CGGCCA
|
1691 |
+
CGGCCC
|
1692 |
+
CGGCCG
|
1693 |
+
CGGCCT
|
1694 |
+
CGGCGA
|
1695 |
+
CGGCGC
|
1696 |
+
CGGCGG
|
1697 |
+
CGGCGT
|
1698 |
+
CGGCTA
|
1699 |
+
CGGCTC
|
1700 |
+
CGGCTG
|
1701 |
+
CGGCTT
|
1702 |
+
CGGGAA
|
1703 |
+
CGGGAC
|
1704 |
+
CGGGAG
|
1705 |
+
CGGGAT
|
1706 |
+
CGGGCA
|
1707 |
+
CGGGCC
|
1708 |
+
CGGGCG
|
1709 |
+
CGGGCT
|
1710 |
+
CGGGGA
|
1711 |
+
CGGGGC
|
1712 |
+
CGGGGG
|
1713 |
+
CGGGGT
|
1714 |
+
CGGGTA
|
1715 |
+
CGGGTC
|
1716 |
+
CGGGTG
|
1717 |
+
CGGGTT
|
1718 |
+
CGGTAA
|
1719 |
+
CGGTAC
|
1720 |
+
CGGTAG
|
1721 |
+
CGGTAT
|
1722 |
+
CGGTCA
|
1723 |
+
CGGTCC
|
1724 |
+
CGGTCG
|
1725 |
+
CGGTCT
|
1726 |
+
CGGTGA
|
1727 |
+
CGGTGC
|
1728 |
+
CGGTGG
|
1729 |
+
CGGTGT
|
1730 |
+
CGGTTA
|
1731 |
+
CGGTTC
|
1732 |
+
CGGTTG
|
1733 |
+
CGGTTT
|
1734 |
+
CGTAAA
|
1735 |
+
CGTAAC
|
1736 |
+
CGTAAG
|
1737 |
+
CGTAAT
|
1738 |
+
CGTACA
|
1739 |
+
CGTACC
|
1740 |
+
CGTACG
|
1741 |
+
CGTACT
|
1742 |
+
CGTAGA
|
1743 |
+
CGTAGC
|
1744 |
+
CGTAGG
|
1745 |
+
CGTAGT
|
1746 |
+
CGTATA
|
1747 |
+
CGTATC
|
1748 |
+
CGTATG
|
1749 |
+
CGTATT
|
1750 |
+
CGTCAA
|
1751 |
+
CGTCAC
|
1752 |
+
CGTCAG
|
1753 |
+
CGTCAT
|
1754 |
+
CGTCCA
|
1755 |
+
CGTCCC
|
1756 |
+
CGTCCG
|
1757 |
+
CGTCCT
|
1758 |
+
CGTCGA
|
1759 |
+
CGTCGC
|
1760 |
+
CGTCGG
|
1761 |
+
CGTCGT
|
1762 |
+
CGTCTA
|
1763 |
+
CGTCTC
|
1764 |
+
CGTCTG
|
1765 |
+
CGTCTT
|
1766 |
+
CGTGAA
|
1767 |
+
CGTGAC
|
1768 |
+
CGTGAG
|
1769 |
+
CGTGAT
|
1770 |
+
CGTGCA
|
1771 |
+
CGTGCC
|
1772 |
+
CGTGCG
|
1773 |
+
CGTGCT
|
1774 |
+
CGTGGA
|
1775 |
+
CGTGGC
|
1776 |
+
CGTGGG
|
1777 |
+
CGTGGT
|
1778 |
+
CGTGTA
|
1779 |
+
CGTGTC
|
1780 |
+
CGTGTG
|
1781 |
+
CGTGTT
|
1782 |
+
CGTTAA
|
1783 |
+
CGTTAC
|
1784 |
+
CGTTAG
|
1785 |
+
CGTTAT
|
1786 |
+
CGTTCA
|
1787 |
+
CGTTCC
|
1788 |
+
CGTTCG
|
1789 |
+
CGTTCT
|
1790 |
+
CGTTGA
|
1791 |
+
CGTTGC
|
1792 |
+
CGTTGG
|
1793 |
+
CGTTGT
|
1794 |
+
CGTTTA
|
1795 |
+
CGTTTC
|
1796 |
+
CGTTTG
|
1797 |
+
CGTTTT
|
1798 |
+
CTAAAA
|
1799 |
+
CTAAAC
|
1800 |
+
CTAAAG
|
1801 |
+
CTAAAT
|
1802 |
+
CTAACA
|
1803 |
+
CTAACC
|
1804 |
+
CTAACG
|
1805 |
+
CTAACT
|
1806 |
+
CTAAGA
|
1807 |
+
CTAAGC
|
1808 |
+
CTAAGG
|
1809 |
+
CTAAGT
|
1810 |
+
CTAATA
|
1811 |
+
CTAATC
|
1812 |
+
CTAATG
|
1813 |
+
CTAATT
|
1814 |
+
CTACAA
|
1815 |
+
CTACAC
|
1816 |
+
CTACAG
|
1817 |
+
CTACAT
|
1818 |
+
CTACCA
|
1819 |
+
CTACCC
|
1820 |
+
CTACCG
|
1821 |
+
CTACCT
|
1822 |
+
CTACGA
|
1823 |
+
CTACGC
|
1824 |
+
CTACGG
|
1825 |
+
CTACGT
|
1826 |
+
CTACTA
|
1827 |
+
CTACTC
|
1828 |
+
CTACTG
|
1829 |
+
CTACTT
|
1830 |
+
CTAGAA
|
1831 |
+
CTAGAC
|
1832 |
+
CTAGAG
|
1833 |
+
CTAGAT
|
1834 |
+
CTAGCA
|
1835 |
+
CTAGCC
|
1836 |
+
CTAGCG
|
1837 |
+
CTAGCT
|
1838 |
+
CTAGGA
|
1839 |
+
CTAGGC
|
1840 |
+
CTAGGG
|
1841 |
+
CTAGGT
|
1842 |
+
CTAGTA
|
1843 |
+
CTAGTC
|
1844 |
+
CTAGTG
|
1845 |
+
CTAGTT
|
1846 |
+
CTATAA
|
1847 |
+
CTATAC
|
1848 |
+
CTATAG
|
1849 |
+
CTATAT
|
1850 |
+
CTATCA
|
1851 |
+
CTATCC
|
1852 |
+
CTATCG
|
1853 |
+
CTATCT
|
1854 |
+
CTATGA
|
1855 |
+
CTATGC
|
1856 |
+
CTATGG
|
1857 |
+
CTATGT
|
1858 |
+
CTATTA
|
1859 |
+
CTATTC
|
1860 |
+
CTATTG
|
1861 |
+
CTATTT
|
1862 |
+
CTCAAA
|
1863 |
+
CTCAAC
|
1864 |
+
CTCAAG
|
1865 |
+
CTCAAT
|
1866 |
+
CTCACA
|
1867 |
+
CTCACC
|
1868 |
+
CTCACG
|
1869 |
+
CTCACT
|
1870 |
+
CTCAGA
|
1871 |
+
CTCAGC
|
1872 |
+
CTCAGG
|
1873 |
+
CTCAGT
|
1874 |
+
CTCATA
|
1875 |
+
CTCATC
|
1876 |
+
CTCATG
|
1877 |
+
CTCATT
|
1878 |
+
CTCCAA
|
1879 |
+
CTCCAC
|
1880 |
+
CTCCAG
|
1881 |
+
CTCCAT
|
1882 |
+
CTCCCA
|
1883 |
+
CTCCCC
|
1884 |
+
CTCCCG
|
1885 |
+
CTCCCT
|
1886 |
+
CTCCGA
|
1887 |
+
CTCCGC
|
1888 |
+
CTCCGG
|
1889 |
+
CTCCGT
|
1890 |
+
CTCCTA
|
1891 |
+
CTCCTC
|
1892 |
+
CTCCTG
|
1893 |
+
CTCCTT
|
1894 |
+
CTCGAA
|
1895 |
+
CTCGAC
|
1896 |
+
CTCGAG
|
1897 |
+
CTCGAT
|
1898 |
+
CTCGCA
|
1899 |
+
CTCGCC
|
1900 |
+
CTCGCG
|
1901 |
+
CTCGCT
|
1902 |
+
CTCGGA
|
1903 |
+
CTCGGC
|
1904 |
+
CTCGGG
|
1905 |
+
CTCGGT
|
1906 |
+
CTCGTA
|
1907 |
+
CTCGTC
|
1908 |
+
CTCGTG
|
1909 |
+
CTCGTT
|
1910 |
+
CTCTAA
|
1911 |
+
CTCTAC
|
1912 |
+
CTCTAG
|
1913 |
+
CTCTAT
|
1914 |
+
CTCTCA
|
1915 |
+
CTCTCC
|
1916 |
+
CTCTCG
|
1917 |
+
CTCTCT
|
1918 |
+
CTCTGA
|
1919 |
+
CTCTGC
|
1920 |
+
CTCTGG
|
1921 |
+
CTCTGT
|
1922 |
+
CTCTTA
|
1923 |
+
CTCTTC
|
1924 |
+
CTCTTG
|
1925 |
+
CTCTTT
|
1926 |
+
CTGAAA
|
1927 |
+
CTGAAC
|
1928 |
+
CTGAAG
|
1929 |
+
CTGAAT
|
1930 |
+
CTGACA
|
1931 |
+
CTGACC
|
1932 |
+
CTGACG
|
1933 |
+
CTGACT
|
1934 |
+
CTGAGA
|
1935 |
+
CTGAGC
|
1936 |
+
CTGAGG
|
1937 |
+
CTGAGT
|
1938 |
+
CTGATA
|
1939 |
+
CTGATC
|
1940 |
+
CTGATG
|
1941 |
+
CTGATT
|
1942 |
+
CTGCAA
|
1943 |
+
CTGCAC
|
1944 |
+
CTGCAG
|
1945 |
+
CTGCAT
|
1946 |
+
CTGCCA
|
1947 |
+
CTGCCC
|
1948 |
+
CTGCCG
|
1949 |
+
CTGCCT
|
1950 |
+
CTGCGA
|
1951 |
+
CTGCGC
|
1952 |
+
CTGCGG
|
1953 |
+
CTGCGT
|
1954 |
+
CTGCTA
|
1955 |
+
CTGCTC
|
1956 |
+
CTGCTG
|
1957 |
+
CTGCTT
|
1958 |
+
CTGGAA
|
1959 |
+
CTGGAC
|
1960 |
+
CTGGAG
|
1961 |
+
CTGGAT
|
1962 |
+
CTGGCA
|
1963 |
+
CTGGCC
|
1964 |
+
CTGGCG
|
1965 |
+
CTGGCT
|
1966 |
+
CTGGGA
|
1967 |
+
CTGGGC
|
1968 |
+
CTGGGG
|
1969 |
+
CTGGGT
|
1970 |
+
CTGGTA
|
1971 |
+
CTGGTC
|
1972 |
+
CTGGTG
|
1973 |
+
CTGGTT
|
1974 |
+
CTGTAA
|
1975 |
+
CTGTAC
|
1976 |
+
CTGTAG
|
1977 |
+
CTGTAT
|
1978 |
+
CTGTCA
|
1979 |
+
CTGTCC
|
1980 |
+
CTGTCG
|
1981 |
+
CTGTCT
|
1982 |
+
CTGTGA
|
1983 |
+
CTGTGC
|
1984 |
+
CTGTGG
|
1985 |
+
CTGTGT
|
1986 |
+
CTGTTA
|
1987 |
+
CTGTTC
|
1988 |
+
CTGTTG
|
1989 |
+
CTGTTT
|
1990 |
+
CTTAAA
|
1991 |
+
CTTAAC
|
1992 |
+
CTTAAG
|
1993 |
+
CTTAAT
|
1994 |
+
CTTACA
|
1995 |
+
CTTACC
|
1996 |
+
CTTACG
|
1997 |
+
CTTACT
|
1998 |
+
CTTAGA
|
1999 |
+
CTTAGC
|
2000 |
+
CTTAGG
|
2001 |
+
CTTAGT
|
2002 |
+
CTTATA
|
2003 |
+
CTTATC
|
2004 |
+
CTTATG
|
2005 |
+
CTTATT
|
2006 |
+
CTTCAA
|
2007 |
+
CTTCAC
|
2008 |
+
CTTCAG
|
2009 |
+
CTTCAT
|
2010 |
+
CTTCCA
|
2011 |
+
CTTCCC
|
2012 |
+
CTTCCG
|
2013 |
+
CTTCCT
|
2014 |
+
CTTCGA
|
2015 |
+
CTTCGC
|
2016 |
+
CTTCGG
|
2017 |
+
CTTCGT
|
2018 |
+
CTTCTA
|
2019 |
+
CTTCTC
|
2020 |
+
CTTCTG
|
2021 |
+
CTTCTT
|
2022 |
+
CTTGAA
|
2023 |
+
CTTGAC
|
2024 |
+
CTTGAG
|
2025 |
+
CTTGAT
|
2026 |
+
CTTGCA
|
2027 |
+
CTTGCC
|
2028 |
+
CTTGCG
|
2029 |
+
CTTGCT
|
2030 |
+
CTTGGA
|
2031 |
+
CTTGGC
|
2032 |
+
CTTGGG
|
2033 |
+
CTTGGT
|
2034 |
+
CTTGTA
|
2035 |
+
CTTGTC
|
2036 |
+
CTTGTG
|
2037 |
+
CTTGTT
|
2038 |
+
CTTTAA
|
2039 |
+
CTTTAC
|
2040 |
+
CTTTAG
|
2041 |
+
CTTTAT
|
2042 |
+
CTTTCA
|
2043 |
+
CTTTCC
|
2044 |
+
CTTTCG
|
2045 |
+
CTTTCT
|
2046 |
+
CTTTGA
|
2047 |
+
CTTTGC
|
2048 |
+
CTTTGG
|
2049 |
+
CTTTGT
|
2050 |
+
CTTTTA
|
2051 |
+
CTTTTC
|
2052 |
+
CTTTTG
|
2053 |
+
CTTTTT
|
2054 |
+
GAAAAA
|
2055 |
+
GAAAAC
|
2056 |
+
GAAAAG
|
2057 |
+
GAAAAT
|
2058 |
+
GAAACA
|
2059 |
+
GAAACC
|
2060 |
+
GAAACG
|
2061 |
+
GAAACT
|
2062 |
+
GAAAGA
|
2063 |
+
GAAAGC
|
2064 |
+
GAAAGG
|
2065 |
+
GAAAGT
|
2066 |
+
GAAATA
|
2067 |
+
GAAATC
|
2068 |
+
GAAATG
|
2069 |
+
GAAATT
|
2070 |
+
GAACAA
|
2071 |
+
GAACAC
|
2072 |
+
GAACAG
|
2073 |
+
GAACAT
|
2074 |
+
GAACCA
|
2075 |
+
GAACCC
|
2076 |
+
GAACCG
|
2077 |
+
GAACCT
|
2078 |
+
GAACGA
|
2079 |
+
GAACGC
|
2080 |
+
GAACGG
|
2081 |
+
GAACGT
|
2082 |
+
GAACTA
|
2083 |
+
GAACTC
|
2084 |
+
GAACTG
|
2085 |
+
GAACTT
|
2086 |
+
GAAGAA
|
2087 |
+
GAAGAC
|
2088 |
+
GAAGAG
|
2089 |
+
GAAGAT
|
2090 |
+
GAAGCA
|
2091 |
+
GAAGCC
|
2092 |
+
GAAGCG
|
2093 |
+
GAAGCT
|
2094 |
+
GAAGGA
|
2095 |
+
GAAGGC
|
2096 |
+
GAAGGG
|
2097 |
+
GAAGGT
|
2098 |
+
GAAGTA
|
2099 |
+
GAAGTC
|
2100 |
+
GAAGTG
|
2101 |
+
GAAGTT
|
2102 |
+
GAATAA
|
2103 |
+
GAATAC
|
2104 |
+
GAATAG
|
2105 |
+
GAATAT
|
2106 |
+
GAATCA
|
2107 |
+
GAATCC
|
2108 |
+
GAATCG
|
2109 |
+
GAATCT
|
2110 |
+
GAATGA
|
2111 |
+
GAATGC
|
2112 |
+
GAATGG
|
2113 |
+
GAATGT
|
2114 |
+
GAATTA
|
2115 |
+
GAATTC
|
2116 |
+
GAATTG
|
2117 |
+
GAATTT
|
2118 |
+
GACAAA
|
2119 |
+
GACAAC
|
2120 |
+
GACAAG
|
2121 |
+
GACAAT
|
2122 |
+
GACACA
|
2123 |
+
GACACC
|
2124 |
+
GACACG
|
2125 |
+
GACACT
|
2126 |
+
GACAGA
|
2127 |
+
GACAGC
|
2128 |
+
GACAGG
|
2129 |
+
GACAGT
|
2130 |
+
GACATA
|
2131 |
+
GACATC
|
2132 |
+
GACATG
|
2133 |
+
GACATT
|
2134 |
+
GACCAA
|
2135 |
+
GACCAC
|
2136 |
+
GACCAG
|
2137 |
+
GACCAT
|
2138 |
+
GACCCA
|
2139 |
+
GACCCC
|
2140 |
+
GACCCG
|
2141 |
+
GACCCT
|
2142 |
+
GACCGA
|
2143 |
+
GACCGC
|
2144 |
+
GACCGG
|
2145 |
+
GACCGT
|
2146 |
+
GACCTA
|
2147 |
+
GACCTC
|
2148 |
+
GACCTG
|
2149 |
+
GACCTT
|
2150 |
+
GACGAA
|
2151 |
+
GACGAC
|
2152 |
+
GACGAG
|
2153 |
+
GACGAT
|
2154 |
+
GACGCA
|
2155 |
+
GACGCC
|
2156 |
+
GACGCG
|
2157 |
+
GACGCT
|
2158 |
+
GACGGA
|
2159 |
+
GACGGC
|
2160 |
+
GACGGG
|
2161 |
+
GACGGT
|
2162 |
+
GACGTA
|
2163 |
+
GACGTC
|
2164 |
+
GACGTG
|
2165 |
+
GACGTT
|
2166 |
+
GACTAA
|
2167 |
+
GACTAC
|
2168 |
+
GACTAG
|
2169 |
+
GACTAT
|
2170 |
+
GACTCA
|
2171 |
+
GACTCC
|
2172 |
+
GACTCG
|
2173 |
+
GACTCT
|
2174 |
+
GACTGA
|
2175 |
+
GACTGC
|
2176 |
+
GACTGG
|
2177 |
+
GACTGT
|
2178 |
+
GACTTA
|
2179 |
+
GACTTC
|
2180 |
+
GACTTG
|
2181 |
+
GACTTT
|
2182 |
+
GAGAAA
|
2183 |
+
GAGAAC
|
2184 |
+
GAGAAG
|
2185 |
+
GAGAAT
|
2186 |
+
GAGACA
|
2187 |
+
GAGACC
|
2188 |
+
GAGACG
|
2189 |
+
GAGACT
|
2190 |
+
GAGAGA
|
2191 |
+
GAGAGC
|
2192 |
+
GAGAGG
|
2193 |
+
GAGAGT
|
2194 |
+
GAGATA
|
2195 |
+
GAGATC
|
2196 |
+
GAGATG
|
2197 |
+
GAGATT
|
2198 |
+
GAGCAA
|
2199 |
+
GAGCAC
|
2200 |
+
GAGCAG
|
2201 |
+
GAGCAT
|
2202 |
+
GAGCCA
|
2203 |
+
GAGCCC
|
2204 |
+
GAGCCG
|
2205 |
+
GAGCCT
|
2206 |
+
GAGCGA
|
2207 |
+
GAGCGC
|
2208 |
+
GAGCGG
|
2209 |
+
GAGCGT
|
2210 |
+
GAGCTA
|
2211 |
+
GAGCTC
|
2212 |
+
GAGCTG
|
2213 |
+
GAGCTT
|
2214 |
+
GAGGAA
|
2215 |
+
GAGGAC
|
2216 |
+
GAGGAG
|
2217 |
+
GAGGAT
|
2218 |
+
GAGGCA
|
2219 |
+
GAGGCC
|
2220 |
+
GAGGCG
|
2221 |
+
GAGGCT
|
2222 |
+
GAGGGA
|
2223 |
+
GAGGGC
|
2224 |
+
GAGGGG
|
2225 |
+
GAGGGT
|
2226 |
+
GAGGTA
|
2227 |
+
GAGGTC
|
2228 |
+
GAGGTG
|
2229 |
+
GAGGTT
|
2230 |
+
GAGTAA
|
2231 |
+
GAGTAC
|
2232 |
+
GAGTAG
|
2233 |
+
GAGTAT
|
2234 |
+
GAGTCA
|
2235 |
+
GAGTCC
|
2236 |
+
GAGTCG
|
2237 |
+
GAGTCT
|
2238 |
+
GAGTGA
|
2239 |
+
GAGTGC
|
2240 |
+
GAGTGG
|
2241 |
+
GAGTGT
|
2242 |
+
GAGTTA
|
2243 |
+
GAGTTC
|
2244 |
+
GAGTTG
|
2245 |
+
GAGTTT
|
2246 |
+
GATAAA
|
2247 |
+
GATAAC
|
2248 |
+
GATAAG
|
2249 |
+
GATAAT
|
2250 |
+
GATACA
|
2251 |
+
GATACC
|
2252 |
+
GATACG
|
2253 |
+
GATACT
|
2254 |
+
GATAGA
|
2255 |
+
GATAGC
|
2256 |
+
GATAGG
|
2257 |
+
GATAGT
|
2258 |
+
GATATA
|
2259 |
+
GATATC
|
2260 |
+
GATATG
|
2261 |
+
GATATT
|
2262 |
+
GATCAA
|
2263 |
+
GATCAC
|
2264 |
+
GATCAG
|
2265 |
+
GATCAT
|
2266 |
+
GATCCA
|
2267 |
+
GATCCC
|
2268 |
+
GATCCG
|
2269 |
+
GATCCT
|
2270 |
+
GATCGA
|
2271 |
+
GATCGC
|
2272 |
+
GATCGG
|
2273 |
+
GATCGT
|
2274 |
+
GATCTA
|
2275 |
+
GATCTC
|
2276 |
+
GATCTG
|
2277 |
+
GATCTT
|
2278 |
+
GATGAA
|
2279 |
+
GATGAC
|
2280 |
+
GATGAG
|
2281 |
+
GATGAT
|
2282 |
+
GATGCA
|
2283 |
+
GATGCC
|
2284 |
+
GATGCG
|
2285 |
+
GATGCT
|
2286 |
+
GATGGA
|
2287 |
+
GATGGC
|
2288 |
+
GATGGG
|
2289 |
+
GATGGT
|
2290 |
+
GATGTA
|
2291 |
+
GATGTC
|
2292 |
+
GATGTG
|
2293 |
+
GATGTT
|
2294 |
+
GATTAA
|
2295 |
+
GATTAC
|
2296 |
+
GATTAG
|
2297 |
+
GATTAT
|
2298 |
+
GATTCA
|
2299 |
+
GATTCC
|
2300 |
+
GATTCG
|
2301 |
+
GATTCT
|
2302 |
+
GATTGA
|
2303 |
+
GATTGC
|
2304 |
+
GATTGG
|
2305 |
+
GATTGT
|
2306 |
+
GATTTA
|
2307 |
+
GATTTC
|
2308 |
+
GATTTG
|
2309 |
+
GATTTT
|
2310 |
+
GCAAAA
|
2311 |
+
GCAAAC
|
2312 |
+
GCAAAG
|
2313 |
+
GCAAAT
|
2314 |
+
GCAACA
|
2315 |
+
GCAACC
|
2316 |
+
GCAACG
|
2317 |
+
GCAACT
|
2318 |
+
GCAAGA
|
2319 |
+
GCAAGC
|
2320 |
+
GCAAGG
|
2321 |
+
GCAAGT
|
2322 |
+
GCAATA
|
2323 |
+
GCAATC
|
2324 |
+
GCAATG
|
2325 |
+
GCAATT
|
2326 |
+
GCACAA
|
2327 |
+
GCACAC
|
2328 |
+
GCACAG
|
2329 |
+
GCACAT
|
2330 |
+
GCACCA
|
2331 |
+
GCACCC
|
2332 |
+
GCACCG
|
2333 |
+
GCACCT
|
2334 |
+
GCACGA
|
2335 |
+
GCACGC
|
2336 |
+
GCACGG
|
2337 |
+
GCACGT
|
2338 |
+
GCACTA
|
2339 |
+
GCACTC
|
2340 |
+
GCACTG
|
2341 |
+
GCACTT
|
2342 |
+
GCAGAA
|
2343 |
+
GCAGAC
|
2344 |
+
GCAGAG
|
2345 |
+
GCAGAT
|
2346 |
+
GCAGCA
|
2347 |
+
GCAGCC
|
2348 |
+
GCAGCG
|
2349 |
+
GCAGCT
|
2350 |
+
GCAGGA
|
2351 |
+
GCAGGC
|
2352 |
+
GCAGGG
|
2353 |
+
GCAGGT
|
2354 |
+
GCAGTA
|
2355 |
+
GCAGTC
|
2356 |
+
GCAGTG
|
2357 |
+
GCAGTT
|
2358 |
+
GCATAA
|
2359 |
+
GCATAC
|
2360 |
+
GCATAG
|
2361 |
+
GCATAT
|
2362 |
+
GCATCA
|
2363 |
+
GCATCC
|
2364 |
+
GCATCG
|
2365 |
+
GCATCT
|
2366 |
+
GCATGA
|
2367 |
+
GCATGC
|
2368 |
+
GCATGG
|
2369 |
+
GCATGT
|
2370 |
+
GCATTA
|
2371 |
+
GCATTC
|
2372 |
+
GCATTG
|
2373 |
+
GCATTT
|
2374 |
+
GCCAAA
|
2375 |
+
GCCAAC
|
2376 |
+
GCCAAG
|
2377 |
+
GCCAAT
|
2378 |
+
GCCACA
|
2379 |
+
GCCACC
|
2380 |
+
GCCACG
|
2381 |
+
GCCACT
|
2382 |
+
GCCAGA
|
2383 |
+
GCCAGC
|
2384 |
+
GCCAGG
|
2385 |
+
GCCAGT
|
2386 |
+
GCCATA
|
2387 |
+
GCCATC
|
2388 |
+
GCCATG
|
2389 |
+
GCCATT
|
2390 |
+
GCCCAA
|
2391 |
+
GCCCAC
|
2392 |
+
GCCCAG
|
2393 |
+
GCCCAT
|
2394 |
+
GCCCCA
|
2395 |
+
GCCCCC
|
2396 |
+
GCCCCG
|
2397 |
+
GCCCCT
|
2398 |
+
GCCCGA
|
2399 |
+
GCCCGC
|
2400 |
+
GCCCGG
|
2401 |
+
GCCCGT
|
2402 |
+
GCCCTA
|
2403 |
+
GCCCTC
|
2404 |
+
GCCCTG
|
2405 |
+
GCCCTT
|
2406 |
+
GCCGAA
|
2407 |
+
GCCGAC
|
2408 |
+
GCCGAG
|
2409 |
+
GCCGAT
|
2410 |
+
GCCGCA
|
2411 |
+
GCCGCC
|
2412 |
+
GCCGCG
|
2413 |
+
GCCGCT
|
2414 |
+
GCCGGA
|
2415 |
+
GCCGGC
|
2416 |
+
GCCGGG
|
2417 |
+
GCCGGT
|
2418 |
+
GCCGTA
|
2419 |
+
GCCGTC
|
2420 |
+
GCCGTG
|
2421 |
+
GCCGTT
|
2422 |
+
GCCTAA
|
2423 |
+
GCCTAC
|
2424 |
+
GCCTAG
|
2425 |
+
GCCTAT
|
2426 |
+
GCCTCA
|
2427 |
+
GCCTCC
|
2428 |
+
GCCTCG
|
2429 |
+
GCCTCT
|
2430 |
+
GCCTGA
|
2431 |
+
GCCTGC
|
2432 |
+
GCCTGG
|
2433 |
+
GCCTGT
|
2434 |
+
GCCTTA
|
2435 |
+
GCCTTC
|
2436 |
+
GCCTTG
|
2437 |
+
GCCTTT
|
2438 |
+
GCGAAA
|
2439 |
+
GCGAAC
|
2440 |
+
GCGAAG
|
2441 |
+
GCGAAT
|
2442 |
+
GCGACA
|
2443 |
+
GCGACC
|
2444 |
+
GCGACG
|
2445 |
+
GCGACT
|
2446 |
+
GCGAGA
|
2447 |
+
GCGAGC
|
2448 |
+
GCGAGG
|
2449 |
+
GCGAGT
|
2450 |
+
GCGATA
|
2451 |
+
GCGATC
|
2452 |
+
GCGATG
|
2453 |
+
GCGATT
|
2454 |
+
GCGCAA
|
2455 |
+
GCGCAC
|
2456 |
+
GCGCAG
|
2457 |
+
GCGCAT
|
2458 |
+
GCGCCA
|
2459 |
+
GCGCCC
|
2460 |
+
GCGCCG
|
2461 |
+
GCGCCT
|
2462 |
+
GCGCGA
|
2463 |
+
GCGCGC
|
2464 |
+
GCGCGG
|
2465 |
+
GCGCGT
|
2466 |
+
GCGCTA
|
2467 |
+
GCGCTC
|
2468 |
+
GCGCTG
|
2469 |
+
GCGCTT
|
2470 |
+
GCGGAA
|
2471 |
+
GCGGAC
|
2472 |
+
GCGGAG
|
2473 |
+
GCGGAT
|
2474 |
+
GCGGCA
|
2475 |
+
GCGGCC
|
2476 |
+
GCGGCG
|
2477 |
+
GCGGCT
|
2478 |
+
GCGGGA
|
2479 |
+
GCGGGC
|
2480 |
+
GCGGGG
|
2481 |
+
GCGGGT
|
2482 |
+
GCGGTA
|
2483 |
+
GCGGTC
|
2484 |
+
GCGGTG
|
2485 |
+
GCGGTT
|
2486 |
+
GCGTAA
|
2487 |
+
GCGTAC
|
2488 |
+
GCGTAG
|
2489 |
+
GCGTAT
|
2490 |
+
GCGTCA
|
2491 |
+
GCGTCC
|
2492 |
+
GCGTCG
|
2493 |
+
GCGTCT
|
2494 |
+
GCGTGA
|
2495 |
+
GCGTGC
|
2496 |
+
GCGTGG
|
2497 |
+
GCGTGT
|
2498 |
+
GCGTTA
|
2499 |
+
GCGTTC
|
2500 |
+
GCGTTG
|
2501 |
+
GCGTTT
|
2502 |
+
GCTAAA
|
2503 |
+
GCTAAC
|
2504 |
+
GCTAAG
|
2505 |
+
GCTAAT
|
2506 |
+
GCTACA
|
2507 |
+
GCTACC
|
2508 |
+
GCTACG
|
2509 |
+
GCTACT
|
2510 |
+
GCTAGA
|
2511 |
+
GCTAGC
|
2512 |
+
GCTAGG
|
2513 |
+
GCTAGT
|
2514 |
+
GCTATA
|
2515 |
+
GCTATC
|
2516 |
+
GCTATG
|
2517 |
+
GCTATT
|
2518 |
+
GCTCAA
|
2519 |
+
GCTCAC
|
2520 |
+
GCTCAG
|
2521 |
+
GCTCAT
|
2522 |
+
GCTCCA
|
2523 |
+
GCTCCC
|
2524 |
+
GCTCCG
|
2525 |
+
GCTCCT
|
2526 |
+
GCTCGA
|
2527 |
+
GCTCGC
|
2528 |
+
GCTCGG
|
2529 |
+
GCTCGT
|
2530 |
+
GCTCTA
|
2531 |
+
GCTCTC
|
2532 |
+
GCTCTG
|
2533 |
+
GCTCTT
|
2534 |
+
GCTGAA
|
2535 |
+
GCTGAC
|
2536 |
+
GCTGAG
|
2537 |
+
GCTGAT
|
2538 |
+
GCTGCA
|
2539 |
+
GCTGCC
|
2540 |
+
GCTGCG
|
2541 |
+
GCTGCT
|
2542 |
+
GCTGGA
|
2543 |
+
GCTGGC
|
2544 |
+
GCTGGG
|
2545 |
+
GCTGGT
|
2546 |
+
GCTGTA
|
2547 |
+
GCTGTC
|
2548 |
+
GCTGTG
|
2549 |
+
GCTGTT
|
2550 |
+
GCTTAA
|
2551 |
+
GCTTAC
|
2552 |
+
GCTTAG
|
2553 |
+
GCTTAT
|
2554 |
+
GCTTCA
|
2555 |
+
GCTTCC
|
2556 |
+
GCTTCG
|
2557 |
+
GCTTCT
|
2558 |
+
GCTTGA
|
2559 |
+
GCTTGC
|
2560 |
+
GCTTGG
|
2561 |
+
GCTTGT
|
2562 |
+
GCTTTA
|
2563 |
+
GCTTTC
|
2564 |
+
GCTTTG
|
2565 |
+
GCTTTT
|
2566 |
+
GGAAAA
|
2567 |
+
GGAAAC
|
2568 |
+
GGAAAG
|
2569 |
+
GGAAAT
|
2570 |
+
GGAACA
|
2571 |
+
GGAACC
|
2572 |
+
GGAACG
|
2573 |
+
GGAACT
|
2574 |
+
GGAAGA
|
2575 |
+
GGAAGC
|
2576 |
+
GGAAGG
|
2577 |
+
GGAAGT
|
2578 |
+
GGAATA
|
2579 |
+
GGAATC
|
2580 |
+
GGAATG
|
2581 |
+
GGAATT
|
2582 |
+
GGACAA
|
2583 |
+
GGACAC
|
2584 |
+
GGACAG
|
2585 |
+
GGACAT
|
2586 |
+
GGACCA
|
2587 |
+
GGACCC
|
2588 |
+
GGACCG
|
2589 |
+
GGACCT
|
2590 |
+
GGACGA
|
2591 |
+
GGACGC
|
2592 |
+
GGACGG
|
2593 |
+
GGACGT
|
2594 |
+
GGACTA
|
2595 |
+
GGACTC
|
2596 |
+
GGACTG
|
2597 |
+
GGACTT
|
2598 |
+
GGAGAA
|
2599 |
+
GGAGAC
|
2600 |
+
GGAGAG
|
2601 |
+
GGAGAT
|
2602 |
+
GGAGCA
|
2603 |
+
GGAGCC
|
2604 |
+
GGAGCG
|
2605 |
+
GGAGCT
|
2606 |
+
GGAGGA
|
2607 |
+
GGAGGC
|
2608 |
+
GGAGGG
|
2609 |
+
GGAGGT
|
2610 |
+
GGAGTA
|
2611 |
+
GGAGTC
|
2612 |
+
GGAGTG
|
2613 |
+
GGAGTT
|
2614 |
+
GGATAA
|
2615 |
+
GGATAC
|
2616 |
+
GGATAG
|
2617 |
+
GGATAT
|
2618 |
+
GGATCA
|
2619 |
+
GGATCC
|
2620 |
+
GGATCG
|
2621 |
+
GGATCT
|
2622 |
+
GGATGA
|
2623 |
+
GGATGC
|
2624 |
+
GGATGG
|
2625 |
+
GGATGT
|
2626 |
+
GGATTA
|
2627 |
+
GGATTC
|
2628 |
+
GGATTG
|
2629 |
+
GGATTT
|
2630 |
+
GGCAAA
|
2631 |
+
GGCAAC
|
2632 |
+
GGCAAG
|
2633 |
+
GGCAAT
|
2634 |
+
GGCACA
|
2635 |
+
GGCACC
|
2636 |
+
GGCACG
|
2637 |
+
GGCACT
|
2638 |
+
GGCAGA
|
2639 |
+
GGCAGC
|
2640 |
+
GGCAGG
|
2641 |
+
GGCAGT
|
2642 |
+
GGCATA
|
2643 |
+
GGCATC
|
2644 |
+
GGCATG
|
2645 |
+
GGCATT
|
2646 |
+
GGCCAA
|
2647 |
+
GGCCAC
|
2648 |
+
GGCCAG
|
2649 |
+
GGCCAT
|
2650 |
+
GGCCCA
|
2651 |
+
GGCCCC
|
2652 |
+
GGCCCG
|
2653 |
+
GGCCCT
|
2654 |
+
GGCCGA
|
2655 |
+
GGCCGC
|
2656 |
+
GGCCGG
|
2657 |
+
GGCCGT
|
2658 |
+
GGCCTA
|
2659 |
+
GGCCTC
|
2660 |
+
GGCCTG
|
2661 |
+
GGCCTT
|
2662 |
+
GGCGAA
|
2663 |
+
GGCGAC
|
2664 |
+
GGCGAG
|
2665 |
+
GGCGAT
|
2666 |
+
GGCGCA
|
2667 |
+
GGCGCC
|
2668 |
+
GGCGCG
|
2669 |
+
GGCGCT
|
2670 |
+
GGCGGA
|
2671 |
+
GGCGGC
|
2672 |
+
GGCGGG
|
2673 |
+
GGCGGT
|
2674 |
+
GGCGTA
|
2675 |
+
GGCGTC
|
2676 |
+
GGCGTG
|
2677 |
+
GGCGTT
|
2678 |
+
GGCTAA
|
2679 |
+
GGCTAC
|
2680 |
+
GGCTAG
|
2681 |
+
GGCTAT
|
2682 |
+
GGCTCA
|
2683 |
+
GGCTCC
|
2684 |
+
GGCTCG
|
2685 |
+
GGCTCT
|
2686 |
+
GGCTGA
|
2687 |
+
GGCTGC
|
2688 |
+
GGCTGG
|
2689 |
+
GGCTGT
|
2690 |
+
GGCTTA
|
2691 |
+
GGCTTC
|
2692 |
+
GGCTTG
|
2693 |
+
GGCTTT
|
2694 |
+
GGGAAA
|
2695 |
+
GGGAAC
|
2696 |
+
GGGAAG
|
2697 |
+
GGGAAT
|
2698 |
+
GGGACA
|
2699 |
+
GGGACC
|
2700 |
+
GGGACG
|
2701 |
+
GGGACT
|
2702 |
+
GGGAGA
|
2703 |
+
GGGAGC
|
2704 |
+
GGGAGG
|
2705 |
+
GGGAGT
|
2706 |
+
GGGATA
|
2707 |
+
GGGATC
|
2708 |
+
GGGATG
|
2709 |
+
GGGATT
|
2710 |
+
GGGCAA
|
2711 |
+
GGGCAC
|
2712 |
+
GGGCAG
|
2713 |
+
GGGCAT
|
2714 |
+
GGGCCA
|
2715 |
+
GGGCCC
|
2716 |
+
GGGCCG
|
2717 |
+
GGGCCT
|
2718 |
+
GGGCGA
|
2719 |
+
GGGCGC
|
2720 |
+
GGGCGG
|
2721 |
+
GGGCGT
|
2722 |
+
GGGCTA
|
2723 |
+
GGGCTC
|
2724 |
+
GGGCTG
|
2725 |
+
GGGCTT
|
2726 |
+
GGGGAA
|
2727 |
+
GGGGAC
|
2728 |
+
GGGGAG
|
2729 |
+
GGGGAT
|
2730 |
+
GGGGCA
|
2731 |
+
GGGGCC
|
2732 |
+
GGGGCG
|
2733 |
+
GGGGCT
|
2734 |
+
GGGGGA
|
2735 |
+
GGGGGC
|
2736 |
+
GGGGGG
|
2737 |
+
GGGGGT
|
2738 |
+
GGGGTA
|
2739 |
+
GGGGTC
|
2740 |
+
GGGGTG
|
2741 |
+
GGGGTT
|
2742 |
+
GGGTAA
|
2743 |
+
GGGTAC
|
2744 |
+
GGGTAG
|
2745 |
+
GGGTAT
|
2746 |
+
GGGTCA
|
2747 |
+
GGGTCC
|
2748 |
+
GGGTCG
|
2749 |
+
GGGTCT
|
2750 |
+
GGGTGA
|
2751 |
+
GGGTGC
|
2752 |
+
GGGTGG
|
2753 |
+
GGGTGT
|
2754 |
+
GGGTTA
|
2755 |
+
GGGTTC
|
2756 |
+
GGGTTG
|
2757 |
+
GGGTTT
|
2758 |
+
GGTAAA
|
2759 |
+
GGTAAC
|
2760 |
+
GGTAAG
|
2761 |
+
GGTAAT
|
2762 |
+
GGTACA
|
2763 |
+
GGTACC
|
2764 |
+
GGTACG
|
2765 |
+
GGTACT
|
2766 |
+
GGTAGA
|
2767 |
+
GGTAGC
|
2768 |
+
GGTAGG
|
2769 |
+
GGTAGT
|
2770 |
+
GGTATA
|
2771 |
+
GGTATC
|
2772 |
+
GGTATG
|
2773 |
+
GGTATT
|
2774 |
+
GGTCAA
|
2775 |
+
GGTCAC
|
2776 |
+
GGTCAG
|
2777 |
+
GGTCAT
|
2778 |
+
GGTCCA
|
2779 |
+
GGTCCC
|
2780 |
+
GGTCCG
|
2781 |
+
GGTCCT
|
2782 |
+
GGTCGA
|
2783 |
+
GGTCGC
|
2784 |
+
GGTCGG
|
2785 |
+
GGTCGT
|
2786 |
+
GGTCTA
|
2787 |
+
GGTCTC
|
2788 |
+
GGTCTG
|
2789 |
+
GGTCTT
|
2790 |
+
GGTGAA
|
2791 |
+
GGTGAC
|
2792 |
+
GGTGAG
|
2793 |
+
GGTGAT
|
2794 |
+
GGTGCA
|
2795 |
+
GGTGCC
|
2796 |
+
GGTGCG
|
2797 |
+
GGTGCT
|
2798 |
+
GGTGGA
|
2799 |
+
GGTGGC
|
2800 |
+
GGTGGG
|
2801 |
+
GGTGGT
|
2802 |
+
GGTGTA
|
2803 |
+
GGTGTC
|
2804 |
+
GGTGTG
|
2805 |
+
GGTGTT
|
2806 |
+
GGTTAA
|
2807 |
+
GGTTAC
|
2808 |
+
GGTTAG
|
2809 |
+
GGTTAT
|
2810 |
+
GGTTCA
|
2811 |
+
GGTTCC
|
2812 |
+
GGTTCG
|
2813 |
+
GGTTCT
|
2814 |
+
GGTTGA
|
2815 |
+
GGTTGC
|
2816 |
+
GGTTGG
|
2817 |
+
GGTTGT
|
2818 |
+
GGTTTA
|
2819 |
+
GGTTTC
|
2820 |
+
GGTTTG
|
2821 |
+
GGTTTT
|
2822 |
+
GTAAAA
|
2823 |
+
GTAAAC
|
2824 |
+
GTAAAG
|
2825 |
+
GTAAAT
|
2826 |
+
GTAACA
|
2827 |
+
GTAACC
|
2828 |
+
GTAACG
|
2829 |
+
GTAACT
|
2830 |
+
GTAAGA
|
2831 |
+
GTAAGC
|
2832 |
+
GTAAGG
|
2833 |
+
GTAAGT
|
2834 |
+
GTAATA
|
2835 |
+
GTAATC
|
2836 |
+
GTAATG
|
2837 |
+
GTAATT
|
2838 |
+
GTACAA
|
2839 |
+
GTACAC
|
2840 |
+
GTACAG
|
2841 |
+
GTACAT
|
2842 |
+
GTACCA
|
2843 |
+
GTACCC
|
2844 |
+
GTACCG
|
2845 |
+
GTACCT
|
2846 |
+
GTACGA
|
2847 |
+
GTACGC
|
2848 |
+
GTACGG
|
2849 |
+
GTACGT
|
2850 |
+
GTACTA
|
2851 |
+
GTACTC
|
2852 |
+
GTACTG
|
2853 |
+
GTACTT
|
2854 |
+
GTAGAA
|
2855 |
+
GTAGAC
|
2856 |
+
GTAGAG
|
2857 |
+
GTAGAT
|
2858 |
+
GTAGCA
|
2859 |
+
GTAGCC
|
2860 |
+
GTAGCG
|
2861 |
+
GTAGCT
|
2862 |
+
GTAGGA
|
2863 |
+
GTAGGC
|
2864 |
+
GTAGGG
|
2865 |
+
GTAGGT
|
2866 |
+
GTAGTA
|
2867 |
+
GTAGTC
|
2868 |
+
GTAGTG
|
2869 |
+
GTAGTT
|
2870 |
+
GTATAA
|
2871 |
+
GTATAC
|
2872 |
+
GTATAG
|
2873 |
+
GTATAT
|
2874 |
+
GTATCA
|
2875 |
+
GTATCC
|
2876 |
+
GTATCG
|
2877 |
+
GTATCT
|
2878 |
+
GTATGA
|
2879 |
+
GTATGC
|
2880 |
+
GTATGG
|
2881 |
+
GTATGT
|
2882 |
+
GTATTA
|
2883 |
+
GTATTC
|
2884 |
+
GTATTG
|
2885 |
+
GTATTT
|
2886 |
+
GTCAAA
|
2887 |
+
GTCAAC
|
2888 |
+
GTCAAG
|
2889 |
+
GTCAAT
|
2890 |
+
GTCACA
|
2891 |
+
GTCACC
|
2892 |
+
GTCACG
|
2893 |
+
GTCACT
|
2894 |
+
GTCAGA
|
2895 |
+
GTCAGC
|
2896 |
+
GTCAGG
|
2897 |
+
GTCAGT
|
2898 |
+
GTCATA
|
2899 |
+
GTCATC
|
2900 |
+
GTCATG
|
2901 |
+
GTCATT
|
2902 |
+
GTCCAA
|
2903 |
+
GTCCAC
|
2904 |
+
GTCCAG
|
2905 |
+
GTCCAT
|
2906 |
+
GTCCCA
|
2907 |
+
GTCCCC
|
2908 |
+
GTCCCG
|
2909 |
+
GTCCCT
|
2910 |
+
GTCCGA
|
2911 |
+
GTCCGC
|
2912 |
+
GTCCGG
|
2913 |
+
GTCCGT
|
2914 |
+
GTCCTA
|
2915 |
+
GTCCTC
|
2916 |
+
GTCCTG
|
2917 |
+
GTCCTT
|
2918 |
+
GTCGAA
|
2919 |
+
GTCGAC
|
2920 |
+
GTCGAG
|
2921 |
+
GTCGAT
|
2922 |
+
GTCGCA
|
2923 |
+
GTCGCC
|
2924 |
+
GTCGCG
|
2925 |
+
GTCGCT
|
2926 |
+
GTCGGA
|
2927 |
+
GTCGGC
|
2928 |
+
GTCGGG
|
2929 |
+
GTCGGT
|
2930 |
+
GTCGTA
|
2931 |
+
GTCGTC
|
2932 |
+
GTCGTG
|
2933 |
+
GTCGTT
|
2934 |
+
GTCTAA
|
2935 |
+
GTCTAC
|
2936 |
+
GTCTAG
|
2937 |
+
GTCTAT
|
2938 |
+
GTCTCA
|
2939 |
+
GTCTCC
|
2940 |
+
GTCTCG
|
2941 |
+
GTCTCT
|
2942 |
+
GTCTGA
|
2943 |
+
GTCTGC
|
2944 |
+
GTCTGG
|
2945 |
+
GTCTGT
|
2946 |
+
GTCTTA
|
2947 |
+
GTCTTC
|
2948 |
+
GTCTTG
|
2949 |
+
GTCTTT
|
2950 |
+
GTGAAA
|
2951 |
+
GTGAAC
|
2952 |
+
GTGAAG
|
2953 |
+
GTGAAT
|
2954 |
+
GTGACA
|
2955 |
+
GTGACC
|
2956 |
+
GTGACG
|
2957 |
+
GTGACT
|
2958 |
+
GTGAGA
|
2959 |
+
GTGAGC
|
2960 |
+
GTGAGG
|
2961 |
+
GTGAGT
|
2962 |
+
GTGATA
|
2963 |
+
GTGATC
|
2964 |
+
GTGATG
|
2965 |
+
GTGATT
|
2966 |
+
GTGCAA
|
2967 |
+
GTGCAC
|
2968 |
+
GTGCAG
|
2969 |
+
GTGCAT
|
2970 |
+
GTGCCA
|
2971 |
+
GTGCCC
|
2972 |
+
GTGCCG
|
2973 |
+
GTGCCT
|
2974 |
+
GTGCGA
|
2975 |
+
GTGCGC
|
2976 |
+
GTGCGG
|
2977 |
+
GTGCGT
|
2978 |
+
GTGCTA
|
2979 |
+
GTGCTC
|
2980 |
+
GTGCTG
|
2981 |
+
GTGCTT
|
2982 |
+
GTGGAA
|
2983 |
+
GTGGAC
|
2984 |
+
GTGGAG
|
2985 |
+
GTGGAT
|
2986 |
+
GTGGCA
|
2987 |
+
GTGGCC
|
2988 |
+
GTGGCG
|
2989 |
+
GTGGCT
|
2990 |
+
GTGGGA
|
2991 |
+
GTGGGC
|
2992 |
+
GTGGGG
|
2993 |
+
GTGGGT
|
2994 |
+
GTGGTA
|
2995 |
+
GTGGTC
|
2996 |
+
GTGGTG
|
2997 |
+
GTGGTT
|
2998 |
+
GTGTAA
|
2999 |
+
GTGTAC
|
3000 |
+
GTGTAG
|
3001 |
+
GTGTAT
|
3002 |
+
GTGTCA
|
3003 |
+
GTGTCC
|
3004 |
+
GTGTCG
|
3005 |
+
GTGTCT
|
3006 |
+
GTGTGA
|
3007 |
+
GTGTGC
|
3008 |
+
GTGTGG
|
3009 |
+
GTGTGT
|
3010 |
+
GTGTTA
|
3011 |
+
GTGTTC
|
3012 |
+
GTGTTG
|
3013 |
+
GTGTTT
|
3014 |
+
GTTAAA
|
3015 |
+
GTTAAC
|
3016 |
+
GTTAAG
|
3017 |
+
GTTAAT
|
3018 |
+
GTTACA
|
3019 |
+
GTTACC
|
3020 |
+
GTTACG
|
3021 |
+
GTTACT
|
3022 |
+
GTTAGA
|
3023 |
+
GTTAGC
|
3024 |
+
GTTAGG
|
3025 |
+
GTTAGT
|
3026 |
+
GTTATA
|
3027 |
+
GTTATC
|
3028 |
+
GTTATG
|
3029 |
+
GTTATT
|
3030 |
+
GTTCAA
|
3031 |
+
GTTCAC
|
3032 |
+
GTTCAG
|
3033 |
+
GTTCAT
|
3034 |
+
GTTCCA
|
3035 |
+
GTTCCC
|
3036 |
+
GTTCCG
|
3037 |
+
GTTCCT
|
3038 |
+
GTTCGA
|
3039 |
+
GTTCGC
|
3040 |
+
GTTCGG
|
3041 |
+
GTTCGT
|
3042 |
+
GTTCTA
|
3043 |
+
GTTCTC
|
3044 |
+
GTTCTG
|
3045 |
+
GTTCTT
|
3046 |
+
GTTGAA
|
3047 |
+
GTTGAC
|
3048 |
+
GTTGAG
|
3049 |
+
GTTGAT
|
3050 |
+
GTTGCA
|
3051 |
+
GTTGCC
|
3052 |
+
GTTGCG
|
3053 |
+
GTTGCT
|
3054 |
+
GTTGGA
|
3055 |
+
GTTGGC
|
3056 |
+
GTTGGG
|
3057 |
+
GTTGGT
|
3058 |
+
GTTGTA
|
3059 |
+
GTTGTC
|
3060 |
+
GTTGTG
|
3061 |
+
GTTGTT
|
3062 |
+
GTTTAA
|
3063 |
+
GTTTAC
|
3064 |
+
GTTTAG
|
3065 |
+
GTTTAT
|
3066 |
+
GTTTCA
|
3067 |
+
GTTTCC
|
3068 |
+
GTTTCG
|
3069 |
+
GTTTCT
|
3070 |
+
GTTTGA
|
3071 |
+
GTTTGC
|
3072 |
+
GTTTGG
|
3073 |
+
GTTTGT
|
3074 |
+
GTTTTA
|
3075 |
+
GTTTTC
|
3076 |
+
GTTTTG
|
3077 |
+
GTTTTT
|
3078 |
+
TAAAAA
|
3079 |
+
TAAAAC
|
3080 |
+
TAAAAG
|
3081 |
+
TAAAAT
|
3082 |
+
TAAACA
|
3083 |
+
TAAACC
|
3084 |
+
TAAACG
|
3085 |
+
TAAACT
|
3086 |
+
TAAAGA
|
3087 |
+
TAAAGC
|
3088 |
+
TAAAGG
|
3089 |
+
TAAAGT
|
3090 |
+
TAAATA
|
3091 |
+
TAAATC
|
3092 |
+
TAAATG
|
3093 |
+
TAAATT
|
3094 |
+
TAACAA
|
3095 |
+
TAACAC
|
3096 |
+
TAACAG
|
3097 |
+
TAACAT
|
3098 |
+
TAACCA
|
3099 |
+
TAACCC
|
3100 |
+
TAACCG
|
3101 |
+
TAACCT
|
3102 |
+
TAACGA
|
3103 |
+
TAACGC
|
3104 |
+
TAACGG
|
3105 |
+
TAACGT
|
3106 |
+
TAACTA
|
3107 |
+
TAACTC
|
3108 |
+
TAACTG
|
3109 |
+
TAACTT
|
3110 |
+
TAAGAA
|
3111 |
+
TAAGAC
|
3112 |
+
TAAGAG
|
3113 |
+
TAAGAT
|
3114 |
+
TAAGCA
|
3115 |
+
TAAGCC
|
3116 |
+
TAAGCG
|
3117 |
+
TAAGCT
|
3118 |
+
TAAGGA
|
3119 |
+
TAAGGC
|
3120 |
+
TAAGGG
|
3121 |
+
TAAGGT
|
3122 |
+
TAAGTA
|
3123 |
+
TAAGTC
|
3124 |
+
TAAGTG
|
3125 |
+
TAAGTT
|
3126 |
+
TAATAA
|
3127 |
+
TAATAC
|
3128 |
+
TAATAG
|
3129 |
+
TAATAT
|
3130 |
+
TAATCA
|
3131 |
+
TAATCC
|
3132 |
+
TAATCG
|
3133 |
+
TAATCT
|
3134 |
+
TAATGA
|
3135 |
+
TAATGC
|
3136 |
+
TAATGG
|
3137 |
+
TAATGT
|
3138 |
+
TAATTA
|
3139 |
+
TAATTC
|
3140 |
+
TAATTG
|
3141 |
+
TAATTT
|
3142 |
+
TACAAA
|
3143 |
+
TACAAC
|
3144 |
+
TACAAG
|
3145 |
+
TACAAT
|
3146 |
+
TACACA
|
3147 |
+
TACACC
|
3148 |
+
TACACG
|
3149 |
+
TACACT
|
3150 |
+
TACAGA
|
3151 |
+
TACAGC
|
3152 |
+
TACAGG
|
3153 |
+
TACAGT
|
3154 |
+
TACATA
|
3155 |
+
TACATC
|
3156 |
+
TACATG
|
3157 |
+
TACATT
|
3158 |
+
TACCAA
|
3159 |
+
TACCAC
|
3160 |
+
TACCAG
|
3161 |
+
TACCAT
|
3162 |
+
TACCCA
|
3163 |
+
TACCCC
|
3164 |
+
TACCCG
|
3165 |
+
TACCCT
|
3166 |
+
TACCGA
|
3167 |
+
TACCGC
|
3168 |
+
TACCGG
|
3169 |
+
TACCGT
|
3170 |
+
TACCTA
|
3171 |
+
TACCTC
|
3172 |
+
TACCTG
|
3173 |
+
TACCTT
|
3174 |
+
TACGAA
|
3175 |
+
TACGAC
|
3176 |
+
TACGAG
|
3177 |
+
TACGAT
|
3178 |
+
TACGCA
|
3179 |
+
TACGCC
|
3180 |
+
TACGCG
|
3181 |
+
TACGCT
|
3182 |
+
TACGGA
|
3183 |
+
TACGGC
|
3184 |
+
TACGGG
|
3185 |
+
TACGGT
|
3186 |
+
TACGTA
|
3187 |
+
TACGTC
|
3188 |
+
TACGTG
|
3189 |
+
TACGTT
|
3190 |
+
TACTAA
|
3191 |
+
TACTAC
|
3192 |
+
TACTAG
|
3193 |
+
TACTAT
|
3194 |
+
TACTCA
|
3195 |
+
TACTCC
|
3196 |
+
TACTCG
|
3197 |
+
TACTCT
|
3198 |
+
TACTGA
|
3199 |
+
TACTGC
|
3200 |
+
TACTGG
|
3201 |
+
TACTGT
|
3202 |
+
TACTTA
|
3203 |
+
TACTTC
|
3204 |
+
TACTTG
|
3205 |
+
TACTTT
|
3206 |
+
TAGAAA
|
3207 |
+
TAGAAC
|
3208 |
+
TAGAAG
|
3209 |
+
TAGAAT
|
3210 |
+
TAGACA
|
3211 |
+
TAGACC
|
3212 |
+
TAGACG
|
3213 |
+
TAGACT
|
3214 |
+
TAGAGA
|
3215 |
+
TAGAGC
|
3216 |
+
TAGAGG
|
3217 |
+
TAGAGT
|
3218 |
+
TAGATA
|
3219 |
+
TAGATC
|
3220 |
+
TAGATG
|
3221 |
+
TAGATT
|
3222 |
+
TAGCAA
|
3223 |
+
TAGCAC
|
3224 |
+
TAGCAG
|
3225 |
+
TAGCAT
|
3226 |
+
TAGCCA
|
3227 |
+
TAGCCC
|
3228 |
+
TAGCCG
|
3229 |
+
TAGCCT
|
3230 |
+
TAGCGA
|
3231 |
+
TAGCGC
|
3232 |
+
TAGCGG
|
3233 |
+
TAGCGT
|
3234 |
+
TAGCTA
|
3235 |
+
TAGCTC
|
3236 |
+
TAGCTG
|
3237 |
+
TAGCTT
|
3238 |
+
TAGGAA
|
3239 |
+
TAGGAC
|
3240 |
+
TAGGAG
|
3241 |
+
TAGGAT
|
3242 |
+
TAGGCA
|
3243 |
+
TAGGCC
|
3244 |
+
TAGGCG
|
3245 |
+
TAGGCT
|
3246 |
+
TAGGGA
|
3247 |
+
TAGGGC
|
3248 |
+
TAGGGG
|
3249 |
+
TAGGGT
|
3250 |
+
TAGGTA
|
3251 |
+
TAGGTC
|
3252 |
+
TAGGTG
|
3253 |
+
TAGGTT
|
3254 |
+
TAGTAA
|
3255 |
+
TAGTAC
|
3256 |
+
TAGTAG
|
3257 |
+
TAGTAT
|
3258 |
+
TAGTCA
|
3259 |
+
TAGTCC
|
3260 |
+
TAGTCG
|
3261 |
+
TAGTCT
|
3262 |
+
TAGTGA
|
3263 |
+
TAGTGC
|
3264 |
+
TAGTGG
|
3265 |
+
TAGTGT
|
3266 |
+
TAGTTA
|
3267 |
+
TAGTTC
|
3268 |
+
TAGTTG
|
3269 |
+
TAGTTT
|
3270 |
+
TATAAA
|
3271 |
+
TATAAC
|
3272 |
+
TATAAG
|
3273 |
+
TATAAT
|
3274 |
+
TATACA
|
3275 |
+
TATACC
|
3276 |
+
TATACG
|
3277 |
+
TATACT
|
3278 |
+
TATAGA
|
3279 |
+
TATAGC
|
3280 |
+
TATAGG
|
3281 |
+
TATAGT
|
3282 |
+
TATATA
|
3283 |
+
TATATC
|
3284 |
+
TATATG
|
3285 |
+
TATATT
|
3286 |
+
TATCAA
|
3287 |
+
TATCAC
|
3288 |
+
TATCAG
|
3289 |
+
TATCAT
|
3290 |
+
TATCCA
|
3291 |
+
TATCCC
|
3292 |
+
TATCCG
|
3293 |
+
TATCCT
|
3294 |
+
TATCGA
|
3295 |
+
TATCGC
|
3296 |
+
TATCGG
|
3297 |
+
TATCGT
|
3298 |
+
TATCTA
|
3299 |
+
TATCTC
|
3300 |
+
TATCTG
|
3301 |
+
TATCTT
|
3302 |
+
TATGAA
|
3303 |
+
TATGAC
|
3304 |
+
TATGAG
|
3305 |
+
TATGAT
|
3306 |
+
TATGCA
|
3307 |
+
TATGCC
|
3308 |
+
TATGCG
|
3309 |
+
TATGCT
|
3310 |
+
TATGGA
|
3311 |
+
TATGGC
|
3312 |
+
TATGGG
|
3313 |
+
TATGGT
|
3314 |
+
TATGTA
|
3315 |
+
TATGTC
|
3316 |
+
TATGTG
|
3317 |
+
TATGTT
|
3318 |
+
TATTAA
|
3319 |
+
TATTAC
|
3320 |
+
TATTAG
|
3321 |
+
TATTAT
|
3322 |
+
TATTCA
|
3323 |
+
TATTCC
|
3324 |
+
TATTCG
|
3325 |
+
TATTCT
|
3326 |
+
TATTGA
|
3327 |
+
TATTGC
|
3328 |
+
TATTGG
|
3329 |
+
TATTGT
|
3330 |
+
TATTTA
|
3331 |
+
TATTTC
|
3332 |
+
TATTTG
|
3333 |
+
TATTTT
|
3334 |
+
TCAAAA
|
3335 |
+
TCAAAC
|
3336 |
+
TCAAAG
|
3337 |
+
TCAAAT
|
3338 |
+
TCAACA
|
3339 |
+
TCAACC
|
3340 |
+
TCAACG
|
3341 |
+
TCAACT
|
3342 |
+
TCAAGA
|
3343 |
+
TCAAGC
|
3344 |
+
TCAAGG
|
3345 |
+
TCAAGT
|
3346 |
+
TCAATA
|
3347 |
+
TCAATC
|
3348 |
+
TCAATG
|
3349 |
+
TCAATT
|
3350 |
+
TCACAA
|
3351 |
+
TCACAC
|
3352 |
+
TCACAG
|
3353 |
+
TCACAT
|
3354 |
+
TCACCA
|
3355 |
+
TCACCC
|
3356 |
+
TCACCG
|
3357 |
+
TCACCT
|
3358 |
+
TCACGA
|
3359 |
+
TCACGC
|
3360 |
+
TCACGG
|
3361 |
+
TCACGT
|
3362 |
+
TCACTA
|
3363 |
+
TCACTC
|
3364 |
+
TCACTG
|
3365 |
+
TCACTT
|
3366 |
+
TCAGAA
|
3367 |
+
TCAGAC
|
3368 |
+
TCAGAG
|
3369 |
+
TCAGAT
|
3370 |
+
TCAGCA
|
3371 |
+
TCAGCC
|
3372 |
+
TCAGCG
|
3373 |
+
TCAGCT
|
3374 |
+
TCAGGA
|
3375 |
+
TCAGGC
|
3376 |
+
TCAGGG
|
3377 |
+
TCAGGT
|
3378 |
+
TCAGTA
|
3379 |
+
TCAGTC
|
3380 |
+
TCAGTG
|
3381 |
+
TCAGTT
|
3382 |
+
TCATAA
|
3383 |
+
TCATAC
|
3384 |
+
TCATAG
|
3385 |
+
TCATAT
|
3386 |
+
TCATCA
|
3387 |
+
TCATCC
|
3388 |
+
TCATCG
|
3389 |
+
TCATCT
|
3390 |
+
TCATGA
|
3391 |
+
TCATGC
|
3392 |
+
TCATGG
|
3393 |
+
TCATGT
|
3394 |
+
TCATTA
|
3395 |
+
TCATTC
|
3396 |
+
TCATTG
|
3397 |
+
TCATTT
|
3398 |
+
TCCAAA
|
3399 |
+
TCCAAC
|
3400 |
+
TCCAAG
|
3401 |
+
TCCAAT
|
3402 |
+
TCCACA
|
3403 |
+
TCCACC
|
3404 |
+
TCCACG
|
3405 |
+
TCCACT
|
3406 |
+
TCCAGA
|
3407 |
+
TCCAGC
|
3408 |
+
TCCAGG
|
3409 |
+
TCCAGT
|
3410 |
+
TCCATA
|
3411 |
+
TCCATC
|
3412 |
+
TCCATG
|
3413 |
+
TCCATT
|
3414 |
+
TCCCAA
|
3415 |
+
TCCCAC
|
3416 |
+
TCCCAG
|
3417 |
+
TCCCAT
|
3418 |
+
TCCCCA
|
3419 |
+
TCCCCC
|
3420 |
+
TCCCCG
|
3421 |
+
TCCCCT
|
3422 |
+
TCCCGA
|
3423 |
+
TCCCGC
|
3424 |
+
TCCCGG
|
3425 |
+
TCCCGT
|
3426 |
+
TCCCTA
|
3427 |
+
TCCCTC
|
3428 |
+
TCCCTG
|
3429 |
+
TCCCTT
|
3430 |
+
TCCGAA
|
3431 |
+
TCCGAC
|
3432 |
+
TCCGAG
|
3433 |
+
TCCGAT
|
3434 |
+
TCCGCA
|
3435 |
+
TCCGCC
|
3436 |
+
TCCGCG
|
3437 |
+
TCCGCT
|
3438 |
+
TCCGGA
|
3439 |
+
TCCGGC
|
3440 |
+
TCCGGG
|
3441 |
+
TCCGGT
|
3442 |
+
TCCGTA
|
3443 |
+
TCCGTC
|
3444 |
+
TCCGTG
|
3445 |
+
TCCGTT
|
3446 |
+
TCCTAA
|
3447 |
+
TCCTAC
|
3448 |
+
TCCTAG
|
3449 |
+
TCCTAT
|
3450 |
+
TCCTCA
|
3451 |
+
TCCTCC
|
3452 |
+
TCCTCG
|
3453 |
+
TCCTCT
|
3454 |
+
TCCTGA
|
3455 |
+
TCCTGC
|
3456 |
+
TCCTGG
|
3457 |
+
TCCTGT
|
3458 |
+
TCCTTA
|
3459 |
+
TCCTTC
|
3460 |
+
TCCTTG
|
3461 |
+
TCCTTT
|
3462 |
+
TCGAAA
|
3463 |
+
TCGAAC
|
3464 |
+
TCGAAG
|
3465 |
+
TCGAAT
|
3466 |
+
TCGACA
|
3467 |
+
TCGACC
|
3468 |
+
TCGACG
|
3469 |
+
TCGACT
|
3470 |
+
TCGAGA
|
3471 |
+
TCGAGC
|
3472 |
+
TCGAGG
|
3473 |
+
TCGAGT
|
3474 |
+
TCGATA
|
3475 |
+
TCGATC
|
3476 |
+
TCGATG
|
3477 |
+
TCGATT
|
3478 |
+
TCGCAA
|
3479 |
+
TCGCAC
|
3480 |
+
TCGCAG
|
3481 |
+
TCGCAT
|
3482 |
+
TCGCCA
|
3483 |
+
TCGCCC
|
3484 |
+
TCGCCG
|
3485 |
+
TCGCCT
|
3486 |
+
TCGCGA
|
3487 |
+
TCGCGC
|
3488 |
+
TCGCGG
|
3489 |
+
TCGCGT
|
3490 |
+
TCGCTA
|
3491 |
+
TCGCTC
|
3492 |
+
TCGCTG
|
3493 |
+
TCGCTT
|
3494 |
+
TCGGAA
|
3495 |
+
TCGGAC
|
3496 |
+
TCGGAG
|
3497 |
+
TCGGAT
|
3498 |
+
TCGGCA
|
3499 |
+
TCGGCC
|
3500 |
+
TCGGCG
|
3501 |
+
TCGGCT
|
3502 |
+
TCGGGA
|
3503 |
+
TCGGGC
|
3504 |
+
TCGGGG
|
3505 |
+
TCGGGT
|
3506 |
+
TCGGTA
|
3507 |
+
TCGGTC
|
3508 |
+
TCGGTG
|
3509 |
+
TCGGTT
|
3510 |
+
TCGTAA
|
3511 |
+
TCGTAC
|
3512 |
+
TCGTAG
|
3513 |
+
TCGTAT
|
3514 |
+
TCGTCA
|
3515 |
+
TCGTCC
|
3516 |
+
TCGTCG
|
3517 |
+
TCGTCT
|
3518 |
+
TCGTGA
|
3519 |
+
TCGTGC
|
3520 |
+
TCGTGG
|
3521 |
+
TCGTGT
|
3522 |
+
TCGTTA
|
3523 |
+
TCGTTC
|
3524 |
+
TCGTTG
|
3525 |
+
TCGTTT
|
3526 |
+
TCTAAA
|
3527 |
+
TCTAAC
|
3528 |
+
TCTAAG
|
3529 |
+
TCTAAT
|
3530 |
+
TCTACA
|
3531 |
+
TCTACC
|
3532 |
+
TCTACG
|
3533 |
+
TCTACT
|
3534 |
+
TCTAGA
|
3535 |
+
TCTAGC
|
3536 |
+
TCTAGG
|
3537 |
+
TCTAGT
|
3538 |
+
TCTATA
|
3539 |
+
TCTATC
|
3540 |
+
TCTATG
|
3541 |
+
TCTATT
|
3542 |
+
TCTCAA
|
3543 |
+
TCTCAC
|
3544 |
+
TCTCAG
|
3545 |
+
TCTCAT
|
3546 |
+
TCTCCA
|
3547 |
+
TCTCCC
|
3548 |
+
TCTCCG
|
3549 |
+
TCTCCT
|
3550 |
+
TCTCGA
|
3551 |
+
TCTCGC
|
3552 |
+
TCTCGG
|
3553 |
+
TCTCGT
|
3554 |
+
TCTCTA
|
3555 |
+
TCTCTC
|
3556 |
+
TCTCTG
|
3557 |
+
TCTCTT
|
3558 |
+
TCTGAA
|
3559 |
+
TCTGAC
|
3560 |
+
TCTGAG
|
3561 |
+
TCTGAT
|
3562 |
+
TCTGCA
|
3563 |
+
TCTGCC
|
3564 |
+
TCTGCG
|
3565 |
+
TCTGCT
|
3566 |
+
TCTGGA
|
3567 |
+
TCTGGC
|
3568 |
+
TCTGGG
|
3569 |
+
TCTGGT
|
3570 |
+
TCTGTA
|
3571 |
+
TCTGTC
|
3572 |
+
TCTGTG
|
3573 |
+
TCTGTT
|
3574 |
+
TCTTAA
|
3575 |
+
TCTTAC
|
3576 |
+
TCTTAG
|
3577 |
+
TCTTAT
|
3578 |
+
TCTTCA
|
3579 |
+
TCTTCC
|
3580 |
+
TCTTCG
|
3581 |
+
TCTTCT
|
3582 |
+
TCTTGA
|
3583 |
+
TCTTGC
|
3584 |
+
TCTTGG
|
3585 |
+
TCTTGT
|
3586 |
+
TCTTTA
|
3587 |
+
TCTTTC
|
3588 |
+
TCTTTG
|
3589 |
+
TCTTTT
|
3590 |
+
TGAAAA
|
3591 |
+
TGAAAC
|
3592 |
+
TGAAAG
|
3593 |
+
TGAAAT
|
3594 |
+
TGAACA
|
3595 |
+
TGAACC
|
3596 |
+
TGAACG
|
3597 |
+
TGAACT
|
3598 |
+
TGAAGA
|
3599 |
+
TGAAGC
|
3600 |
+
TGAAGG
|
3601 |
+
TGAAGT
|
3602 |
+
TGAATA
|
3603 |
+
TGAATC
|
3604 |
+
TGAATG
|
3605 |
+
TGAATT
|
3606 |
+
TGACAA
|
3607 |
+
TGACAC
|
3608 |
+
TGACAG
|
3609 |
+
TGACAT
|
3610 |
+
TGACCA
|
3611 |
+
TGACCC
|
3612 |
+
TGACCG
|
3613 |
+
TGACCT
|
3614 |
+
TGACGA
|
3615 |
+
TGACGC
|
3616 |
+
TGACGG
|
3617 |
+
TGACGT
|
3618 |
+
TGACTA
|
3619 |
+
TGACTC
|
3620 |
+
TGACTG
|
3621 |
+
TGACTT
|
3622 |
+
TGAGAA
|
3623 |
+
TGAGAC
|
3624 |
+
TGAGAG
|
3625 |
+
TGAGAT
|
3626 |
+
TGAGCA
|
3627 |
+
TGAGCC
|
3628 |
+
TGAGCG
|
3629 |
+
TGAGCT
|
3630 |
+
TGAGGA
|
3631 |
+
TGAGGC
|
3632 |
+
TGAGGG
|
3633 |
+
TGAGGT
|
3634 |
+
TGAGTA
|
3635 |
+
TGAGTC
|
3636 |
+
TGAGTG
|
3637 |
+
TGAGTT
|
3638 |
+
TGATAA
|
3639 |
+
TGATAC
|
3640 |
+
TGATAG
|
3641 |
+
TGATAT
|
3642 |
+
TGATCA
|
3643 |
+
TGATCC
|
3644 |
+
TGATCG
|
3645 |
+
TGATCT
|
3646 |
+
TGATGA
|
3647 |
+
TGATGC
|
3648 |
+
TGATGG
|
3649 |
+
TGATGT
|
3650 |
+
TGATTA
|
3651 |
+
TGATTC
|
3652 |
+
TGATTG
|
3653 |
+
TGATTT
|
3654 |
+
TGCAAA
|
3655 |
+
TGCAAC
|
3656 |
+
TGCAAG
|
3657 |
+
TGCAAT
|
3658 |
+
TGCACA
|
3659 |
+
TGCACC
|
3660 |
+
TGCACG
|
3661 |
+
TGCACT
|
3662 |
+
TGCAGA
|
3663 |
+
TGCAGC
|
3664 |
+
TGCAGG
|
3665 |
+
TGCAGT
|
3666 |
+
TGCATA
|
3667 |
+
TGCATC
|
3668 |
+
TGCATG
|
3669 |
+
TGCATT
|
3670 |
+
TGCCAA
|
3671 |
+
TGCCAC
|
3672 |
+
TGCCAG
|
3673 |
+
TGCCAT
|
3674 |
+
TGCCCA
|
3675 |
+
TGCCCC
|
3676 |
+
TGCCCG
|
3677 |
+
TGCCCT
|
3678 |
+
TGCCGA
|
3679 |
+
TGCCGC
|
3680 |
+
TGCCGG
|
3681 |
+
TGCCGT
|
3682 |
+
TGCCTA
|
3683 |
+
TGCCTC
|
3684 |
+
TGCCTG
|
3685 |
+
TGCCTT
|
3686 |
+
TGCGAA
|
3687 |
+
TGCGAC
|
3688 |
+
TGCGAG
|
3689 |
+
TGCGAT
|
3690 |
+
TGCGCA
|
3691 |
+
TGCGCC
|
3692 |
+
TGCGCG
|
3693 |
+
TGCGCT
|
3694 |
+
TGCGGA
|
3695 |
+
TGCGGC
|
3696 |
+
TGCGGG
|
3697 |
+
TGCGGT
|
3698 |
+
TGCGTA
|
3699 |
+
TGCGTC
|
3700 |
+
TGCGTG
|
3701 |
+
TGCGTT
|
3702 |
+
TGCTAA
|
3703 |
+
TGCTAC
|
3704 |
+
TGCTAG
|
3705 |
+
TGCTAT
|
3706 |
+
TGCTCA
|
3707 |
+
TGCTCC
|
3708 |
+
TGCTCG
|
3709 |
+
TGCTCT
|
3710 |
+
TGCTGA
|
3711 |
+
TGCTGC
|
3712 |
+
TGCTGG
|
3713 |
+
TGCTGT
|
3714 |
+
TGCTTA
|
3715 |
+
TGCTTC
|
3716 |
+
TGCTTG
|
3717 |
+
TGCTTT
|
3718 |
+
TGGAAA
|
3719 |
+
TGGAAC
|
3720 |
+
TGGAAG
|
3721 |
+
TGGAAT
|
3722 |
+
TGGACA
|
3723 |
+
TGGACC
|
3724 |
+
TGGACG
|
3725 |
+
TGGACT
|
3726 |
+
TGGAGA
|
3727 |
+
TGGAGC
|
3728 |
+
TGGAGG
|
3729 |
+
TGGAGT
|
3730 |
+
TGGATA
|
3731 |
+
TGGATC
|
3732 |
+
TGGATG
|
3733 |
+
TGGATT
|
3734 |
+
TGGCAA
|
3735 |
+
TGGCAC
|
3736 |
+
TGGCAG
|
3737 |
+
TGGCAT
|
3738 |
+
TGGCCA
|
3739 |
+
TGGCCC
|
3740 |
+
TGGCCG
|
3741 |
+
TGGCCT
|
3742 |
+
TGGCGA
|
3743 |
+
TGGCGC
|
3744 |
+
TGGCGG
|
3745 |
+
TGGCGT
|
3746 |
+
TGGCTA
|
3747 |
+
TGGCTC
|
3748 |
+
TGGCTG
|
3749 |
+
TGGCTT
|
3750 |
+
TGGGAA
|
3751 |
+
TGGGAC
|
3752 |
+
TGGGAG
|
3753 |
+
TGGGAT
|
3754 |
+
TGGGCA
|
3755 |
+
TGGGCC
|
3756 |
+
TGGGCG
|
3757 |
+
TGGGCT
|
3758 |
+
TGGGGA
|
3759 |
+
TGGGGC
|
3760 |
+
TGGGGG
|
3761 |
+
TGGGGT
|
3762 |
+
TGGGTA
|
3763 |
+
TGGGTC
|
3764 |
+
TGGGTG
|
3765 |
+
TGGGTT
|
3766 |
+
TGGTAA
|
3767 |
+
TGGTAC
|
3768 |
+
TGGTAG
|
3769 |
+
TGGTAT
|
3770 |
+
TGGTCA
|
3771 |
+
TGGTCC
|
3772 |
+
TGGTCG
|
3773 |
+
TGGTCT
|
3774 |
+
TGGTGA
|
3775 |
+
TGGTGC
|
3776 |
+
TGGTGG
|
3777 |
+
TGGTGT
|
3778 |
+
TGGTTA
|
3779 |
+
TGGTTC
|
3780 |
+
TGGTTG
|
3781 |
+
TGGTTT
|
3782 |
+
TGTAAA
|
3783 |
+
TGTAAC
|
3784 |
+
TGTAAG
|
3785 |
+
TGTAAT
|
3786 |
+
TGTACA
|
3787 |
+
TGTACC
|
3788 |
+
TGTACG
|
3789 |
+
TGTACT
|
3790 |
+
TGTAGA
|
3791 |
+
TGTAGC
|
3792 |
+
TGTAGG
|
3793 |
+
TGTAGT
|
3794 |
+
TGTATA
|
3795 |
+
TGTATC
|
3796 |
+
TGTATG
|
3797 |
+
TGTATT
|
3798 |
+
TGTCAA
|
3799 |
+
TGTCAC
|
3800 |
+
TGTCAG
|
3801 |
+
TGTCAT
|
3802 |
+
TGTCCA
|
3803 |
+
TGTCCC
|
3804 |
+
TGTCCG
|
3805 |
+
TGTCCT
|
3806 |
+
TGTCGA
|
3807 |
+
TGTCGC
|
3808 |
+
TGTCGG
|
3809 |
+
TGTCGT
|
3810 |
+
TGTCTA
|
3811 |
+
TGTCTC
|
3812 |
+
TGTCTG
|
3813 |
+
TGTCTT
|
3814 |
+
TGTGAA
|
3815 |
+
TGTGAC
|
3816 |
+
TGTGAG
|
3817 |
+
TGTGAT
|
3818 |
+
TGTGCA
|
3819 |
+
TGTGCC
|
3820 |
+
TGTGCG
|
3821 |
+
TGTGCT
|
3822 |
+
TGTGGA
|
3823 |
+
TGTGGC
|
3824 |
+
TGTGGG
|
3825 |
+
TGTGGT
|
3826 |
+
TGTGTA
|
3827 |
+
TGTGTC
|
3828 |
+
TGTGTG
|
3829 |
+
TGTGTT
|
3830 |
+
TGTTAA
|
3831 |
+
TGTTAC
|
3832 |
+
TGTTAG
|
3833 |
+
TGTTAT
|
3834 |
+
TGTTCA
|
3835 |
+
TGTTCC
|
3836 |
+
TGTTCG
|
3837 |
+
TGTTCT
|
3838 |
+
TGTTGA
|
3839 |
+
TGTTGC
|
3840 |
+
TGTTGG
|
3841 |
+
TGTTGT
|
3842 |
+
TGTTTA
|
3843 |
+
TGTTTC
|
3844 |
+
TGTTTG
|
3845 |
+
TGTTTT
|
3846 |
+
TTAAAA
|
3847 |
+
TTAAAC
|
3848 |
+
TTAAAG
|
3849 |
+
TTAAAT
|
3850 |
+
TTAACA
|
3851 |
+
TTAACC
|
3852 |
+
TTAACG
|
3853 |
+
TTAACT
|
3854 |
+
TTAAGA
|
3855 |
+
TTAAGC
|
3856 |
+
TTAAGG
|
3857 |
+
TTAAGT
|
3858 |
+
TTAATA
|
3859 |
+
TTAATC
|
3860 |
+
TTAATG
|
3861 |
+
TTAATT
|
3862 |
+
TTACAA
|
3863 |
+
TTACAC
|
3864 |
+
TTACAG
|
3865 |
+
TTACAT
|
3866 |
+
TTACCA
|
3867 |
+
TTACCC
|
3868 |
+
TTACCG
|
3869 |
+
TTACCT
|
3870 |
+
TTACGA
|
3871 |
+
TTACGC
|
3872 |
+
TTACGG
|
3873 |
+
TTACGT
|
3874 |
+
TTACTA
|
3875 |
+
TTACTC
|
3876 |
+
TTACTG
|
3877 |
+
TTACTT
|
3878 |
+
TTAGAA
|
3879 |
+
TTAGAC
|
3880 |
+
TTAGAG
|
3881 |
+
TTAGAT
|
3882 |
+
TTAGCA
|
3883 |
+
TTAGCC
|
3884 |
+
TTAGCG
|
3885 |
+
TTAGCT
|
3886 |
+
TTAGGA
|
3887 |
+
TTAGGC
|
3888 |
+
TTAGGG
|
3889 |
+
TTAGGT
|
3890 |
+
TTAGTA
|
3891 |
+
TTAGTC
|
3892 |
+
TTAGTG
|
3893 |
+
TTAGTT
|
3894 |
+
TTATAA
|
3895 |
+
TTATAC
|
3896 |
+
TTATAG
|
3897 |
+
TTATAT
|
3898 |
+
TTATCA
|
3899 |
+
TTATCC
|
3900 |
+
TTATCG
|
3901 |
+
TTATCT
|
3902 |
+
TTATGA
|
3903 |
+
TTATGC
|
3904 |
+
TTATGG
|
3905 |
+
TTATGT
|
3906 |
+
TTATTA
|
3907 |
+
TTATTC
|
3908 |
+
TTATTG
|
3909 |
+
TTATTT
|
3910 |
+
TTCAAA
|
3911 |
+
TTCAAC
|
3912 |
+
TTCAAG
|
3913 |
+
TTCAAT
|
3914 |
+
TTCACA
|
3915 |
+
TTCACC
|
3916 |
+
TTCACG
|
3917 |
+
TTCACT
|
3918 |
+
TTCAGA
|
3919 |
+
TTCAGC
|
3920 |
+
TTCAGG
|
3921 |
+
TTCAGT
|
3922 |
+
TTCATA
|
3923 |
+
TTCATC
|
3924 |
+
TTCATG
|
3925 |
+
TTCATT
|
3926 |
+
TTCCAA
|
3927 |
+
TTCCAC
|
3928 |
+
TTCCAG
|
3929 |
+
TTCCAT
|
3930 |
+
TTCCCA
|
3931 |
+
TTCCCC
|
3932 |
+
TTCCCG
|
3933 |
+
TTCCCT
|
3934 |
+
TTCCGA
|
3935 |
+
TTCCGC
|
3936 |
+
TTCCGG
|
3937 |
+
TTCCGT
|
3938 |
+
TTCCTA
|
3939 |
+
TTCCTC
|
3940 |
+
TTCCTG
|
3941 |
+
TTCCTT
|
3942 |
+
TTCGAA
|
3943 |
+
TTCGAC
|
3944 |
+
TTCGAG
|
3945 |
+
TTCGAT
|
3946 |
+
TTCGCA
|
3947 |
+
TTCGCC
|
3948 |
+
TTCGCG
|
3949 |
+
TTCGCT
|
3950 |
+
TTCGGA
|
3951 |
+
TTCGGC
|
3952 |
+
TTCGGG
|
3953 |
+
TTCGGT
|
3954 |
+
TTCGTA
|
3955 |
+
TTCGTC
|
3956 |
+
TTCGTG
|
3957 |
+
TTCGTT
|
3958 |
+
TTCTAA
|
3959 |
+
TTCTAC
|
3960 |
+
TTCTAG
|
3961 |
+
TTCTAT
|
3962 |
+
TTCTCA
|
3963 |
+
TTCTCC
|
3964 |
+
TTCTCG
|
3965 |
+
TTCTCT
|
3966 |
+
TTCTGA
|
3967 |
+
TTCTGC
|
3968 |
+
TTCTGG
|
3969 |
+
TTCTGT
|
3970 |
+
TTCTTA
|
3971 |
+
TTCTTC
|
3972 |
+
TTCTTG
|
3973 |
+
TTCTTT
|
3974 |
+
TTGAAA
|
3975 |
+
TTGAAC
|
3976 |
+
TTGAAG
|
3977 |
+
TTGAAT
|
3978 |
+
TTGACA
|
3979 |
+
TTGACC
|
3980 |
+
TTGACG
|
3981 |
+
TTGACT
|
3982 |
+
TTGAGA
|
3983 |
+
TTGAGC
|
3984 |
+
TTGAGG
|
3985 |
+
TTGAGT
|
3986 |
+
TTGATA
|
3987 |
+
TTGATC
|
3988 |
+
TTGATG
|
3989 |
+
TTGATT
|
3990 |
+
TTGCAA
|
3991 |
+
TTGCAC
|
3992 |
+
TTGCAG
|
3993 |
+
TTGCAT
|
3994 |
+
TTGCCA
|
3995 |
+
TTGCCC
|
3996 |
+
TTGCCG
|
3997 |
+
TTGCCT
|
3998 |
+
TTGCGA
|
3999 |
+
TTGCGC
|
4000 |
+
TTGCGG
|
4001 |
+
TTGCGT
|
4002 |
+
TTGCTA
|
4003 |
+
TTGCTC
|
4004 |
+
TTGCTG
|
4005 |
+
TTGCTT
|
4006 |
+
TTGGAA
|
4007 |
+
TTGGAC
|
4008 |
+
TTGGAG
|
4009 |
+
TTGGAT
|
4010 |
+
TTGGCA
|
4011 |
+
TTGGCC
|
4012 |
+
TTGGCG
|
4013 |
+
TTGGCT
|
4014 |
+
TTGGGA
|
4015 |
+
TTGGGC
|
4016 |
+
TTGGGG
|
4017 |
+
TTGGGT
|
4018 |
+
TTGGTA
|
4019 |
+
TTGGTC
|
4020 |
+
TTGGTG
|
4021 |
+
TTGGTT
|
4022 |
+
TTGTAA
|
4023 |
+
TTGTAC
|
4024 |
+
TTGTAG
|
4025 |
+
TTGTAT
|
4026 |
+
TTGTCA
|
4027 |
+
TTGTCC
|
4028 |
+
TTGTCG
|
4029 |
+
TTGTCT
|
4030 |
+
TTGTGA
|
4031 |
+
TTGTGC
|
4032 |
+
TTGTGG
|
4033 |
+
TTGTGT
|
4034 |
+
TTGTTA
|
4035 |
+
TTGTTC
|
4036 |
+
TTGTTG
|
4037 |
+
TTGTTT
|
4038 |
+
TTTAAA
|
4039 |
+
TTTAAC
|
4040 |
+
TTTAAG
|
4041 |
+
TTTAAT
|
4042 |
+
TTTACA
|
4043 |
+
TTTACC
|
4044 |
+
TTTACG
|
4045 |
+
TTTACT
|
4046 |
+
TTTAGA
|
4047 |
+
TTTAGC
|
4048 |
+
TTTAGG
|
4049 |
+
TTTAGT
|
4050 |
+
TTTATA
|
4051 |
+
TTTATC
|
4052 |
+
TTTATG
|
4053 |
+
TTTATT
|
4054 |
+
TTTCAA
|
4055 |
+
TTTCAC
|
4056 |
+
TTTCAG
|
4057 |
+
TTTCAT
|
4058 |
+
TTTCCA
|
4059 |
+
TTTCCC
|
4060 |
+
TTTCCG
|
4061 |
+
TTTCCT
|
4062 |
+
TTTCGA
|
4063 |
+
TTTCGC
|
4064 |
+
TTTCGG
|
4065 |
+
TTTCGT
|
4066 |
+
TTTCTA
|
4067 |
+
TTTCTC
|
4068 |
+
TTTCTG
|
4069 |
+
TTTCTT
|
4070 |
+
TTTGAA
|
4071 |
+
TTTGAC
|
4072 |
+
TTTGAG
|
4073 |
+
TTTGAT
|
4074 |
+
TTTGCA
|
4075 |
+
TTTGCC
|
4076 |
+
TTTGCG
|
4077 |
+
TTTGCT
|
4078 |
+
TTTGGA
|
4079 |
+
TTTGGC
|
4080 |
+
TTTGGG
|
4081 |
+
TTTGGT
|
4082 |
+
TTTGTA
|
4083 |
+
TTTGTC
|
4084 |
+
TTTGTG
|
4085 |
+
TTTGTT
|
4086 |
+
TTTTAA
|
4087 |
+
TTTTAC
|
4088 |
+
TTTTAG
|
4089 |
+
TTTTAT
|
4090 |
+
TTTTCA
|
4091 |
+
TTTTCC
|
4092 |
+
TTTTCG
|
4093 |
+
TTTTCT
|
4094 |
+
TTTTGA
|
4095 |
+
TTTTGC
|
4096 |
+
TTTTGG
|
4097 |
+
TTTTGT
|
4098 |
+
TTTTTA
|
4099 |
+
TTTTTC
|
4100 |
+
TTTTTG
|
4101 |
+
TTTTTT
|
data/prokbert_vocabs/prokbert-base-dna7/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/prokbert_vocabs/prokbert-base-dna8/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/prokbert_vocabs/prokbert-base-dna9/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
general_utils.py
ADDED
@@ -0,0 +1,304 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
|
3 |
+
import pandas as pd
|
4 |
+
import os
|
5 |
+
import numpy as np
|
6 |
+
import subprocess
|
7 |
+
import shutil
|
8 |
+
""" Library for general utils, such as dataframe properties checking,
|
9 |
+
creating directories, checking files, etc.
|
10 |
+
"""
|
11 |
+
|
12 |
+
|
13 |
+
def check_expected_columns(df: pd.DataFrame, expected_columns: list) -> bool:
|
14 |
+
"""
|
15 |
+
Checks if a DataFrame contains the expected columns.
|
16 |
+
|
17 |
+
Parameters
|
18 |
+
----------
|
19 |
+
df : pd.DataFrame
|
20 |
+
The input DataFrame to be checked.
|
21 |
+
expected_columns : list
|
22 |
+
A list of columns that are expected to be present in the DataFrame.
|
23 |
+
|
24 |
+
Returns
|
25 |
+
-------
|
26 |
+
bool
|
27 |
+
True if all expected columns are present in the DataFrame, False otherwise.
|
28 |
+
|
29 |
+
Raises
|
30 |
+
------
|
31 |
+
ValueError
|
32 |
+
If any of the expected columns are not present in the DataFrame.
|
33 |
+
|
34 |
+
Examples
|
35 |
+
--------
|
36 |
+
>>> df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
|
37 |
+
>>> check_expected_columns(df, ['A', 'B'])
|
38 |
+
True
|
39 |
+
|
40 |
+
>>> check_expected_columns(df, ['A', 'C'])
|
41 |
+
ValueError: The following columns are missing: ['C']
|
42 |
+
"""
|
43 |
+
|
44 |
+
missing_columns = [col for col in expected_columns if col not in df.columns]
|
45 |
+
|
46 |
+
if missing_columns:
|
47 |
+
raise ValueError(f"The following columns are missing: {missing_columns}")
|
48 |
+
|
49 |
+
return True
|
50 |
+
|
51 |
+
|
52 |
+
def is_valid_primary_key(df: pd.DataFrame, column_name: str) -> bool:
|
53 |
+
"""
|
54 |
+
Checks if a specified column in a DataFrame can serve as a valid primary key.
|
55 |
+
|
56 |
+
Parameters
|
57 |
+
----------
|
58 |
+
df : pd.DataFrame
|
59 |
+
The input DataFrame to be checked.
|
60 |
+
column_name : str
|
61 |
+
The name of the column to check.
|
62 |
+
|
63 |
+
Returns
|
64 |
+
-------
|
65 |
+
bool
|
66 |
+
True if the column can serve as a valid primary key, False otherwise.
|
67 |
+
|
68 |
+
Raises
|
69 |
+
------
|
70 |
+
ValueError
|
71 |
+
If the specified column does not exist in the DataFrame.
|
72 |
+
|
73 |
+
Examples
|
74 |
+
--------
|
75 |
+
>>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
|
76 |
+
>>> is_valid_primary_key(df, 'A')
|
77 |
+
True
|
78 |
+
|
79 |
+
>>> df = pd.DataFrame({'A': [1, 2, 2], 'B': [4, 5, 6]})
|
80 |
+
>>> is_valid_primary_key(df, 'A')
|
81 |
+
False
|
82 |
+
"""
|
83 |
+
|
84 |
+
if column_name not in df.columns:
|
85 |
+
raise ValueError(f"Column '{column_name}' does not exist in the DataFrame.")
|
86 |
+
|
87 |
+
# Check for NaN values
|
88 |
+
if df[column_name].isnull().any():
|
89 |
+
return False
|
90 |
+
|
91 |
+
# Check for unique values
|
92 |
+
if not df[column_name].is_unique:
|
93 |
+
return False
|
94 |
+
|
95 |
+
return True
|
96 |
+
|
97 |
+
def get_non_empty_files(start_path: str, extensions: tuple = ('.fasta', '.fna')) -> str:
|
98 |
+
"""
|
99 |
+
Generator that yields non-empty files from a specified directory and its subdirectories based on the given extensions.
|
100 |
+
|
101 |
+
:param start_path: The path to the directory from which to start the search.
|
102 |
+
:type start_path: str
|
103 |
+
|
104 |
+
:param extensions: A tuple of file extensions to look for (default is ('.fasta', '.fna')).
|
105 |
+
The function also automatically checks for compressed versions with '.gz'.
|
106 |
+
:type extensions: tuple
|
107 |
+
|
108 |
+
:return: Yields filenames that match the specified extensions and are non-empty.
|
109 |
+
:rtype: str
|
110 |
+
"""
|
111 |
+
|
112 |
+
for dirpath, _, filenames in os.walk(start_path):
|
113 |
+
for filename in filenames:
|
114 |
+
filepath = os.path.join(dirpath, filename)
|
115 |
+
if any(filename.endswith(ext) or filename.endswith(ext + '.gz') for ext in extensions) and os.path.getsize(filepath) > 0:
|
116 |
+
yield filename
|
117 |
+
|
118 |
+
|
119 |
+
|
120 |
+
def truncate_zero_columns(arr: np.ndarray) -> np.ndarray:
|
121 |
+
"""
|
122 |
+
Truncate all trailing columns composed entirely of zeros in a given 2D numpy array.
|
123 |
+
|
124 |
+
:param arr: Input 2D numpy array.
|
125 |
+
:type arr: np.ndarray
|
126 |
+
|
127 |
+
:return: A new array with trailing zero columns removed.
|
128 |
+
:rtype: np.ndarray
|
129 |
+
"""
|
130 |
+
|
131 |
+
# Iterate over columns from the end
|
132 |
+
for idx in range(arr.shape[1]-1, -1, -1):
|
133 |
+
if np.any(arr[:, idx]):
|
134 |
+
return arr[:, :(idx+1)]
|
135 |
+
return np.empty((arr.shape[0], 0))
|
136 |
+
|
137 |
+
|
138 |
+
import os
|
139 |
+
|
140 |
+
def create_directory_for_filepath(filepath: str) -> None:
|
141 |
+
"""
|
142 |
+
Given a file path, creates the underlying directory structure if it doesn't already exist.
|
143 |
+
|
144 |
+
Args:
|
145 |
+
filepath (str): The path to the file for which the directory structure should be created.
|
146 |
+
|
147 |
+
Raises:
|
148 |
+
ValueError: If the provided path is empty or None.
|
149 |
+
OSError: If there's an error creating the directory structure.
|
150 |
+
"""
|
151 |
+
|
152 |
+
if not filepath:
|
153 |
+
raise ValueError("The provided filepath is empty or None.")
|
154 |
+
|
155 |
+
directory = os.path.dirname(filepath)
|
156 |
+
|
157 |
+
if directory and not os.path.exists(directory):
|
158 |
+
try:
|
159 |
+
os.makedirs(directory)
|
160 |
+
print(f"Directory structure {directory} created successfully.")
|
161 |
+
except OSError as e:
|
162 |
+
raise OSError(f"Error creating directory structure {directory}. Error: {e}")
|
163 |
+
|
164 |
+
# Example usage:
|
165 |
+
# create_directory_for_filepath("/path/to/directory/that/might/not/exist/filename.txt")
|
166 |
+
|
167 |
+
def check_file_exists(file_path: str) -> bool:
|
168 |
+
"""
|
169 |
+
Checks if the provided file path exists.
|
170 |
+
|
171 |
+
Args:
|
172 |
+
file_path (str): Path to the file.
|
173 |
+
|
174 |
+
Returns:
|
175 |
+
bool: True if the file exists, raises ValueError otherwise.
|
176 |
+
"""
|
177 |
+
if os.path.exists(file_path):
|
178 |
+
return True
|
179 |
+
else:
|
180 |
+
raise ValueError(f"The provided file path '{file_path}' does not exist.")
|
181 |
+
|
182 |
+
def count_gpus():
|
183 |
+
# Count NVIDIA GPUs
|
184 |
+
import torch
|
185 |
+
nvidia_gpu_count = torch.cuda.device_count()
|
186 |
+
|
187 |
+
# Count AMD GPUs
|
188 |
+
amd_gpu_count = 0
|
189 |
+
try:
|
190 |
+
clinfo_output = subprocess.check_output('clinfo').decode('utf-8')
|
191 |
+
amd_gpu_count = clinfo_output.count('Device Type: GPU')
|
192 |
+
except:
|
193 |
+
pass # clinfo command might not be available
|
194 |
+
|
195 |
+
total_gpus = nvidia_gpu_count + amd_gpu_count
|
196 |
+
|
197 |
+
return total_gpus
|
198 |
+
|
199 |
+
|
200 |
+
def create_hard_links(source_directory: str, target_directory: str, blacklist: list = []) -> None:
|
201 |
+
"""
|
202 |
+
Creates hard links for all files from the source directory to the target directory.
|
203 |
+
|
204 |
+
Args:
|
205 |
+
source_directory (str): The directory containing the original files.
|
206 |
+
target_directory (str): The directory where hard links will be created.
|
207 |
+
blacklist (list): List of filenames to exclude from creating hard links.
|
208 |
+
|
209 |
+
Returns:
|
210 |
+
None
|
211 |
+
"""
|
212 |
+
|
213 |
+
# Ensure the provided directories exist
|
214 |
+
if not os.path.exists(source_directory):
|
215 |
+
raise ValueError(f"The source directory '{source_directory}' does not exist.")
|
216 |
+
if not os.path.exists(target_directory):
|
217 |
+
os.makedirs(target_directory)
|
218 |
+
|
219 |
+
# Iterate through the files in the source directory
|
220 |
+
for filename in os.listdir(source_directory):
|
221 |
+
source_file_path = os.path.join(source_directory, filename)
|
222 |
+
target_file_path = os.path.join(target_directory, filename)
|
223 |
+
|
224 |
+
# Check for files to skip
|
225 |
+
if (filename.startswith('.') or
|
226 |
+
filename.startswith('_') or
|
227 |
+
os.path.isdir(source_file_path) or
|
228 |
+
filename in blacklist):
|
229 |
+
continue
|
230 |
+
|
231 |
+
# Create a hard link
|
232 |
+
os.link(source_file_path, target_file_path)
|
233 |
+
|
234 |
+
return f"Hard links created in {target_directory} from {source_directory}."
|
235 |
+
|
236 |
+
# Example usage
|
237 |
+
# create_hard_links("/path/to/source_directory", "/path/to/target_directory", blacklist=["file_to_skip.txt"])
|
238 |
+
|
239 |
+
def create_selected_hard_links(source_directory: str, target_directory: str, filenames: list) -> None:
|
240 |
+
"""
|
241 |
+
Creates hard links for the specified files from the source directory to the target directory.
|
242 |
+
|
243 |
+
Args:
|
244 |
+
source_directory (str): The directory containing the original files.
|
245 |
+
target_directory (str): The directory where hard links will be created.
|
246 |
+
filenames (list): List of filenames for which hard links should be created.
|
247 |
+
|
248 |
+
Returns:
|
249 |
+
None
|
250 |
+
"""
|
251 |
+
|
252 |
+
# Ensure the provided directories exist
|
253 |
+
if not os.path.exists(source_directory):
|
254 |
+
raise ValueError(f"The source directory '{source_directory}' does not exist.")
|
255 |
+
if not os.path.exists(target_directory):
|
256 |
+
os.makedirs(target_directory)
|
257 |
+
|
258 |
+
# Iterate through the specified filenames
|
259 |
+
for filename in filenames:
|
260 |
+
source_file_path = os.path.join(source_directory, filename)
|
261 |
+
target_file_path = os.path.join(target_directory, filename)
|
262 |
+
|
263 |
+
# Ensure the file exists in the source directory
|
264 |
+
if not os.path.isfile(source_file_path):
|
265 |
+
print(f"Warning: {filename} does not exist in the source directory. Skipping.")
|
266 |
+
continue
|
267 |
+
|
268 |
+
# Create a hard link
|
269 |
+
try:
|
270 |
+
os.link(source_file_path, target_file_path)
|
271 |
+
except FileExistsError:
|
272 |
+
print(f'The target hard link {target_file_path} exist. Skipping...')
|
273 |
+
|
274 |
+
return f"Hard links for specified files created in {target_directory} from {source_directory}."
|
275 |
+
|
276 |
+
def remove_hidden_files(directory: str) -> None:
|
277 |
+
"""
|
278 |
+
Removes all files recursively in a folder that start with '.' or '_'.
|
279 |
+
|
280 |
+
Args:
|
281 |
+
directory (str): The directory from which hidden files should be removed.
|
282 |
+
|
283 |
+
Returns:
|
284 |
+
None
|
285 |
+
"""
|
286 |
+
|
287 |
+
# Ensure the directory exists
|
288 |
+
if not os.path.exists(directory):
|
289 |
+
raise ValueError(f"The directory '{directory}' does not exist.")
|
290 |
+
|
291 |
+
# Use os.walk to iterate through all subdirectories and files
|
292 |
+
for dirpath, dirnames, filenames in os.walk(directory, topdown=False):
|
293 |
+
|
294 |
+
# Filter out directories starting with '.' or '_'
|
295 |
+
dirnames[:] = [d for d in dirnames if not d.startswith('.') and not d.startswith('_')]
|
296 |
+
|
297 |
+
# Remove files starting with '.' or '_'
|
298 |
+
for filename in filenames:
|
299 |
+
if filename.startswith('.') or filename.startswith('_'):
|
300 |
+
file_path = os.path.join(dirpath, filename)
|
301 |
+
os.remove(file_path)
|
302 |
+
print(f"Removed: {file_path}")
|
303 |
+
|
304 |
+
print(f"All hidden files removed from {directory}.")
|
prokbert_tokenizer.py
ADDED
@@ -0,0 +1,429 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Based on https://github.com/huggingface/transformers/blob/main/src/transformers/models/bert/tokenization_bert.py
|
3 |
+
|
4 |
+
|
5 |
+
# ProkBERT tokenizer stuff
|
6 |
+
|
7 |
+
import collections
|
8 |
+
import os
|
9 |
+
import unicodedata
|
10 |
+
from typing import List, Optional, Tuple, Union
|
11 |
+
from copy import deepcopy
|
12 |
+
from transformers import PreTrainedTokenizer
|
13 |
+
from transformers.tokenization_utils import _is_control, _is_punctuation, _is_whitespace
|
14 |
+
from transformers.utils import logging
|
15 |
+
|
16 |
+
# These utils contains the tools needed by the ProkBERT tokenizer
|
17 |
+
|
18 |
+
from config_utils import *
|
19 |
+
from sequtils import *
|
20 |
+
|
21 |
+
import logging as logger
|
22 |
+
|
23 |
+
#logger = logging.get_logger(__name__)
|
24 |
+
|
25 |
+
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
|
26 |
+
|
27 |
+
# models prokbert-mini-k6s1, prokbert-large-k6s2, prokbert-large-k6s1
|
28 |
+
|
29 |
+
|
30 |
+
PRETRAINED_VOCAB_FILES_MAP = {
|
31 |
+
"vocab_file": {
|
32 |
+
"prokbert-mini-k6s1": "prokbert-base-dna6/vocab.txt",
|
33 |
+
"prokbert-large-k6s1": "prokbert-base-dna6/vocab.txt",
|
34 |
+
"prokbert-large-k6s2": "prokbert-base-dna6/vocab.txt"
|
35 |
+
}
|
36 |
+
}
|
37 |
+
|
38 |
+
|
39 |
+
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
|
40 |
+
"prokbert-mini-k6s1": 1024,
|
41 |
+
"prokbert-large-k6s1": 1024,
|
42 |
+
"prokbert-large-k6s2": 1024
|
43 |
+
}
|
44 |
+
|
45 |
+
PRETRAINED_INIT_CONFIGURATION = {
|
46 |
+
"prokbert-mini-k6s1": {"do_upper_case": True},
|
47 |
+
"prokbert-large-k6s1": {"do_upper_case": True},
|
48 |
+
"prokbert-large-k6s2": {"do_upper_case": True}
|
49 |
+
|
50 |
+
}
|
51 |
+
|
52 |
+
|
53 |
+
def load_vocab(vocab_file):
|
54 |
+
"""Loads a vocabulary file into a dictionary."""
|
55 |
+
vocab = collections.OrderedDict()
|
56 |
+
with open(vocab_file, "r", encoding="utf-8") as reader:
|
57 |
+
tokens = reader.readlines()
|
58 |
+
for index, token in enumerate(tokens):
|
59 |
+
token = token.rstrip("\n")
|
60 |
+
vocab[token] = index
|
61 |
+
return vocab
|
62 |
+
|
63 |
+
|
64 |
+
class ProkBERTTokenizer(PreTrainedTokenizer):
|
65 |
+
"""Custom tokenizer for ProkBERT."""
|
66 |
+
|
67 |
+
vocab_files_names = VOCAB_FILES_NAMES
|
68 |
+
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
69 |
+
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
|
70 |
+
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
71 |
+
nucleotide_abc = {'A', 'T', 'C', 'G'}
|
72 |
+
extended_nucleotide_abc = {'A', 'T', 'C', 'G', '*'}
|
73 |
+
sequence_unk_token = 'N'
|
74 |
+
default_unk_token="[UNK]"
|
75 |
+
default_sep_token="[SEP]"
|
76 |
+
default_pad_token="[PAD]"
|
77 |
+
default_cls_token="[CLS]"
|
78 |
+
default_mask_token="[MASK]"
|
79 |
+
|
80 |
+
|
81 |
+
def __init__(self,
|
82 |
+
tokenization_params: Dict = {},
|
83 |
+
segmentation_params: Dict = {},
|
84 |
+
comp_params: Dict = {},
|
85 |
+
operation_space: str = 'sequence',
|
86 |
+
**kwargs):
|
87 |
+
"""Initialize the ProkBERT tokenizer.
|
88 |
+
|
89 |
+
Args:
|
90 |
+
tokenization_params (Dict, optional): Tokenization parameters. Defaults to {}.
|
91 |
+
segmentation_params (Dict, optional): Segmentation parameters. Defaults to {}.
|
92 |
+
comp_params (Dict, optional): Computational parameters. Defaults to {}.
|
93 |
+
operation_space (str, optional): Specifies the operation mode. Can be 'kmer' or 'sequence'. Defaults to 'kmer'.
|
94 |
+
"""
|
95 |
+
super().__init__(cls_token=ProkBERTTokenizer.default_cls_token,
|
96 |
+
**kwargs)
|
97 |
+
|
98 |
+
self.defconfig = SeqConfig()
|
99 |
+
self.tokenization_params = self.defconfig.get_and_set_tokenization_parameters(tokenization_params)
|
100 |
+
self.segmentation_params = self.defconfig.get_and_set_segmentation_parameters(segmentation_params)
|
101 |
+
self.comp_params = self.defconfig.get_and_set_computational_parameters(comp_params)
|
102 |
+
self.operation_space = operation_space
|
103 |
+
|
104 |
+
vocab_file = self.tokenization_params['vocabfile']
|
105 |
+
self.vocab = self.tokenization_params['vocabmap']
|
106 |
+
self.id2token = {v: k for k, v in self.vocab.items()}
|
107 |
+
self.max_len = self.tokenization_params['max_segment_length']
|
108 |
+
|
109 |
+
if self.operation_space == 'sequence':
|
110 |
+
token_extension = sorted(list(set(generate_kmers(ProkBERTTokenizer.extended_nucleotide_abc, self.tokenization_params['kmer'])) - \
|
111 |
+
set(generate_kmers(ProkBERTTokenizer.nucleotide_abc, self.tokenization_params['kmer'])) ))
|
112 |
+
self.extended_vocab = deepcopy(self.vocab)
|
113 |
+
for token in token_extension:
|
114 |
+
self.extended_vocab[token] = 4
|
115 |
+
|
116 |
+
self.unk_token = ProkBERTTokenizer.sequence_unk_token * self.tokenization_params['shift']
|
117 |
+
self.mask_token = '*'
|
118 |
+
self.extended_vocab[self.mask_token] = self.vocab['[MASK]']
|
119 |
+
|
120 |
+
full_unk = 'N' * self.tokenization_params['kmer']
|
121 |
+
self.vocab[full_unk] = 1
|
122 |
+
self.id2token[1] = full_unk
|
123 |
+
self.full_unk_token = full_unk
|
124 |
+
|
125 |
+
else:
|
126 |
+
self.extended_vocab = self.vocab
|
127 |
+
self.unk_token = '[UNK]'
|
128 |
+
self.sep_token = '[SEP]'
|
129 |
+
self.cls_token = '[CLS]'
|
130 |
+
self.pad_token = '[PAD]'
|
131 |
+
self.mask_token = '[MASK]'
|
132 |
+
self.special_tokens = list(self.special_tokens_map.values())
|
133 |
+
|
134 |
+
def __len__(self) -> int:
|
135 |
+
return len(self.vocab)
|
136 |
+
|
137 |
+
|
138 |
+
def tokenize(self, text: str, lca_shift: int = 0, all: bool = False) -> Union[List[str], Tuple[List[List[str]], List[List[str]]]]:
|
139 |
+
"""
|
140 |
+
Tokenizes a given segment.
|
141 |
+
|
142 |
+
Args:
|
143 |
+
text (str): The DNA segment to tokenize.
|
144 |
+
lca_shift (int, optional): Which tokenized vector belonging to the specified LCA offset should be returned. Defaults to 0.
|
145 |
+
all (bool, optional): If True, returns all possible tokenizations. Defaults to False.
|
146 |
+
|
147 |
+
Returns:
|
148 |
+
Union[List[str], Tuple[List[List[str]], List[List[str]]]]: Tokenized segment or tuple of all possible tokenizations.
|
149 |
+
|
150 |
+
Usage Example:
|
151 |
+
>>> tokenizer = ProkBERTTokenizer(...)
|
152 |
+
>>> segment = 'AATCAAGGAATTATTATCGTT'
|
153 |
+
>>> tokens, kmers = tokenizer.tokenize(segment, all=True)
|
154 |
+
>>> print(tokens)
|
155 |
+
...
|
156 |
+
"""
|
157 |
+
tokenized_segments, kmerized_segments = lca_tokenize_segment(text, self.tokenization_params)
|
158 |
+
if all:
|
159 |
+
return tokenized_segments, kmerized_segments
|
160 |
+
else:
|
161 |
+
return kmerized_segments[lca_shift]
|
162 |
+
|
163 |
+
def _convert_token_to_id(self, token):
|
164 |
+
"""Converts a token (str) in an id using the vocab."""
|
165 |
+
return self.vocab.get(token, self.vocab.get(self.unk_token))
|
166 |
+
|
167 |
+
def _convert_id_to_token(self, index):
|
168 |
+
"""Converts an index (integer) in a token (str) using the vocab."""
|
169 |
+
return self.ids_to_tokens.get(index, self.unk_token)
|
170 |
+
|
171 |
+
|
172 |
+
def depr_convert_ids_to_tokens(self, ids: Union[int, List[int]]) -> List[str]:
|
173 |
+
"""
|
174 |
+
Converts tokens to their corresponding IDs.
|
175 |
+
|
176 |
+
Args:
|
177 |
+
tokens (List[str]): List of tokens to convert.
|
178 |
+
|
179 |
+
Returns:
|
180 |
+
List[int]: List of corresponding token IDs.
|
181 |
+
|
182 |
+
Usage Example:
|
183 |
+
>>> tokenizer = ProkBERTTokenizer(...)
|
184 |
+
>>> tokens = ['AATCAA', 'TCAAGG']
|
185 |
+
>>> ids = tokenizer.convert_tokens_to_ids(tokens)
|
186 |
+
>>> print(ids)
|
187 |
+
...
|
188 |
+
"""
|
189 |
+
|
190 |
+
if isinstance(ids, int):
|
191 |
+
token_ids = self.vocab.get(ids, self.vocab[self.unk_token])
|
192 |
+
|
193 |
+
|
194 |
+
if self.operation_space == 'sequence':
|
195 |
+
token_ids = [self.vocab.get(token, self.vocab[self.full_unk_token]) for token in tokens]
|
196 |
+
|
197 |
+
else:
|
198 |
+
token_ids = [self.vocab.get(token, self.vocab[self.unk_token]) for token in tokens]
|
199 |
+
|
200 |
+
return token_ids
|
201 |
+
|
202 |
+
def convert_ids_to_tokens(self, ids: Union[int, List[int]]) -> Union[str, List[str]]:
|
203 |
+
"""
|
204 |
+
Converts token IDs back to their original tokens.
|
205 |
+
|
206 |
+
Args:
|
207 |
+
ids (List[int]): List of token IDs to convert.
|
208 |
+
|
209 |
+
Returns:
|
210 |
+
List[str]: List of corresponding tokens.
|
211 |
+
|
212 |
+
Usage Example:
|
213 |
+
>>> tokenizer = ProkBERTTokenizer(...)
|
214 |
+
>>> ids = [213, 3343]
|
215 |
+
>>> tokens = tokenizer.convert_ids_to_tokens(ids)
|
216 |
+
>>> print(tokens)
|
217 |
+
...
|
218 |
+
"""
|
219 |
+
if isinstance(ids, int):
|
220 |
+
ids = [ids]
|
221 |
+
if len(ids) == 1:
|
222 |
+
#default_token_list = [self.id2token.get(ids[0], self.unk_token)]
|
223 |
+
return self.id2token.get(ids[0], self.unk_token)
|
224 |
+
|
225 |
+
if self.operation_space == 'kmer':
|
226 |
+
token_list = [self.id2token.get(id, self.unk_token) for id in ids]
|
227 |
+
|
228 |
+
elif self.operation_space == 'sequence':
|
229 |
+
token_list = []
|
230 |
+
# Handling the sentence start
|
231 |
+
if ids[0] == 2:
|
232 |
+
pass
|
233 |
+
else:
|
234 |
+
token_list.append(self.id2token.get(ids[0], self.unk_token))
|
235 |
+
if len(ids) > 1:
|
236 |
+
# if this is a kmer then we add accordingly.
|
237 |
+
true_start_token = self.id2token.get(ids[1], self.unk_token)
|
238 |
+
|
239 |
+
|
240 |
+
token_list.append(true_start_token)
|
241 |
+
print(token_list)
|
242 |
+
if len(ids) >2:
|
243 |
+
# Adding the other tokens until the end
|
244 |
+
for token_id in ids[2:]:
|
245 |
+
mapped_token_id = self.id2token.get(token_id, self.unk_token)
|
246 |
+
if (mapped_token_id in self.special_tokens):
|
247 |
+
act_token_value = ''
|
248 |
+
else:
|
249 |
+
act_token_value = mapped_token_id[-1*self.tokenization_params['shift']:]
|
250 |
+
token_list.append(act_token_value)
|
251 |
+
|
252 |
+
return token_list
|
253 |
+
|
254 |
+
|
255 |
+
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
|
256 |
+
"""Saves the vocabulary to a file."""
|
257 |
+
if filename_prefix is None:
|
258 |
+
filename_prefix = ""
|
259 |
+
vocab_file_path = os.path.join(save_directory, filename_prefix + "vocab.txt")
|
260 |
+
with open(vocab_file_path, "w") as f:
|
261 |
+
for token in self.vocab:
|
262 |
+
f.write(token + "\\n")
|
263 |
+
return (vocab_file_path,)
|
264 |
+
|
265 |
+
@classmethod
|
266 |
+
def from_pretrained(cls, vocab_file: str) -> 'ProkBERTTokenizer':
|
267 |
+
"""Loads a pre-trained tokenizer.
|
268 |
+
|
269 |
+
Args:
|
270 |
+
vocab_file (str): Path to the pre-trained tokenizer vocabulary file.
|
271 |
+
|
272 |
+
Returns:
|
273 |
+
ProkBERTTokenizer: Loaded tokenizer instance.
|
274 |
+
"""
|
275 |
+
return cls(vocab_file)
|
276 |
+
|
277 |
+
def encode_plus(self, text: str, lca_shift: int = 0, **kwargs) -> Dict[str, np.ndarray]:
|
278 |
+
"""
|
279 |
+
Tokenizes a sequence and returns it in a format suitable for model input.
|
280 |
+
|
281 |
+
Args:
|
282 |
+
text (str): The sequence to tokenize.
|
283 |
+
lca_shift (int, optional): LCA offset for tokenization. Defaults to 0.
|
284 |
+
|
285 |
+
Returns:
|
286 |
+
Dict[str, np.ndarray]: Dictionary containing token IDs and attention masks.
|
287 |
+
|
288 |
+
Usage Example:
|
289 |
+
>>> tokenizer = ProkBERTTokenizer(...)
|
290 |
+
>>> segment = 'AATCAAGGAATTATTATCGTT'
|
291 |
+
>>> encoded = tokenizer.encode_plus(segment)
|
292 |
+
>>> print(encoded)
|
293 |
+
...
|
294 |
+
"""
|
295 |
+
tokenized_segments, kmerized_segments = lca_tokenize_segment(text, self.tokenization_params)
|
296 |
+
input_ids = tokenized_segments[lca_shift]
|
297 |
+
attention_mask = [1] * len(input_ids)
|
298 |
+
|
299 |
+
# Padding
|
300 |
+
while len(input_ids) < self.max_len:
|
301 |
+
input_ids.append(0)
|
302 |
+
attention_mask.append(0)
|
303 |
+
|
304 |
+
return {
|
305 |
+
"input_ids": np.array(input_ids, dtype=self.comp_params['np_tokentype']),
|
306 |
+
"attention_mask": np.array(attention_mask, dtype=self.comp_params['np_tokentype'])
|
307 |
+
}
|
308 |
+
|
309 |
+
def batch_encode_plus(self, sequences: List[str], lca_shift: int = 0, all: bool = False, **kwargs) -> Dict[str, List[List[int]]]:
|
310 |
+
"""
|
311 |
+
Tokenizes multiple sequences and returns them in a format suitable for model input. It is assumed that sequences
|
312 |
+
have already been preprocessed (i.e., segmented) and quality controlled.
|
313 |
+
|
314 |
+
Args:
|
315 |
+
- sequences (List[str]): A list of DNA sequences to be tokenized.
|
316 |
+
- lca_shift (int, default=0): The LCA offset or windows to get the tokenized vector. If the required offset is >= shift,
|
317 |
+
an error is raised.
|
318 |
+
- all (bool, default=False): Whether all possible tokenization vectors should be returned. If False, only the specified
|
319 |
+
offset is used.
|
320 |
+
- **kwargs: Additional arguments (like max_length, padding, etc.)
|
321 |
+
|
322 |
+
Returns:
|
323 |
+
- Dict[str, List[List[int]]]: A dictionary containing token IDs, attention masks, and token type IDs.
|
324 |
+
"""
|
325 |
+
shift = self.tokenization_params['shift']
|
326 |
+
if lca_shift >= shift:
|
327 |
+
raise ValueError(f'The required offset {lca_shift} is invalid. The maximum offset should be < {shift}')
|
328 |
+
|
329 |
+
# Parallel tokenization. First, create unique IDs for all sequences.
|
330 |
+
sequence_ids = list(range(len(sequences)))
|
331 |
+
to_tokenize_data = (sequences, sequence_ids)
|
332 |
+
|
333 |
+
# Tokenize each sequence
|
334 |
+
tokenization_results = batch_tokenize_segments_with_ids(
|
335 |
+
to_tokenize_data,
|
336 |
+
self.tokenization_params,
|
337 |
+
self.comp_params['cpu_cores_for_tokenization'],
|
338 |
+
self.comp_params['batch_size_tokenization'],
|
339 |
+
self.comp_params['np_tokentype']
|
340 |
+
)
|
341 |
+
|
342 |
+
# Generate input ids, token type ids, and attention masks
|
343 |
+
input_ids = []
|
344 |
+
token_type_ids = []
|
345 |
+
attention_masks = []
|
346 |
+
|
347 |
+
if all:
|
348 |
+
for tokenized_vectors in tokenization_results.values():
|
349 |
+
for tokenized_vector in tokenized_vectors:
|
350 |
+
input_ids.append(tokenized_vector)
|
351 |
+
token_type_ids.append([0] * len(tokenized_vector))
|
352 |
+
attention_masks.append([1] * len(tokenized_vector))
|
353 |
+
else:
|
354 |
+
for tokenized_vectors in tokenization_results.values():
|
355 |
+
selected_vector = tokenized_vectors[lca_shift]
|
356 |
+
input_ids.append(selected_vector)
|
357 |
+
token_type_ids.append([0] * len(selected_vector))
|
358 |
+
attention_masks.append([1] * len(selected_vector))
|
359 |
+
|
360 |
+
return {
|
361 |
+
"input_ids": input_ids,
|
362 |
+
"token_type_ids": token_type_ids,
|
363 |
+
"attention_mask": attention_masks
|
364 |
+
}
|
365 |
+
|
366 |
+
def encode(self, segment: str, lca_shift: int = 0, all: bool = False, add_special_tokens: bool = True, **kwargs) -> List[int]:
|
367 |
+
"""
|
368 |
+
Encode a DNA sequence into its corresponding token IDs.
|
369 |
+
|
370 |
+
Args:
|
371 |
+
text (str): The DNA segment to encode.
|
372 |
+
add_special_tokens (bool, optional): Whether to add special tokens like [CLS] and [SEP]. Defaults to True.
|
373 |
+
|
374 |
+
Returns:
|
375 |
+
List[int]: Encoded token IDs.
|
376 |
+
|
377 |
+
Usage Example:
|
378 |
+
>>> tokenizer = ProkBERTTokenizer(...)
|
379 |
+
>>> segment = 'AATCAAGGAATTATTATCGTT'
|
380 |
+
>>> ids = tokenizer.encode(segment)
|
381 |
+
>>> print(ids)
|
382 |
+
...
|
383 |
+
"""
|
384 |
+
shift = self.tokenization_params['shift']
|
385 |
+
if lca_shift >= shift:
|
386 |
+
raise ValueError(f'The required offset {lca_shift} is invalid. The maximum offset should be < {shift}')
|
387 |
+
|
388 |
+
tokenized_segments, _ = lca_tokenize_segment(segment, self.tokenization_params)
|
389 |
+
|
390 |
+
# if all is set to True, then we return all the possible ids as a list
|
391 |
+
if all:
|
392 |
+
token_ids = tokenized_segments
|
393 |
+
if not add_special_tokens:
|
394 |
+
new_token_ids = []
|
395 |
+
for token_id_set in tokenized_segments:
|
396 |
+
new_token_ids.append(token_id_set[1:len(token_id_set)-1])
|
397 |
+
token_ids = new_token_ids
|
398 |
+
|
399 |
+
else:
|
400 |
+
token_ids = tokenized_segments[lca_shift]
|
401 |
+
# Convert tokens to their corresponding IDs
|
402 |
+
# Add special tokens if needed
|
403 |
+
if not add_special_tokens:
|
404 |
+
token_ids = token_ids[1:len(token_ids)-1]
|
405 |
+
|
406 |
+
return token_ids
|
407 |
+
|
408 |
+
def decode(self, ids):
|
409 |
+
tokens = self.convert_ids_to_tokens(ids)
|
410 |
+
return ''.join(tokens)
|
411 |
+
|
412 |
+
def batch_decode(self, token_ids_list: List[List[int]], **kwargs) -> List[str]:
|
413 |
+
"""
|
414 |
+
Decodes multiple token ID sequences back into their original sequences.
|
415 |
+
|
416 |
+
Args:
|
417 |
+
token_ids_list (List[List[int]]): List of token ID sequences.
|
418 |
+
|
419 |
+
Returns:
|
420 |
+
List[str]: List of decoded sequences.
|
421 |
+
|
422 |
+
Usage Example:
|
423 |
+
>>> tokenizer = ProkBERTTokenizer(...)
|
424 |
+
>>> ids = [[2, 213, 3343, 165, 2580, 248, 3905, 978, 3296, 3]]
|
425 |
+
>>> sequences = tokenizer.batch_decode(ids)
|
426 |
+
>>> print(sequences)
|
427 |
+
...
|
428 |
+
"""
|
429 |
+
return [self.decode(token_ids) for token_ids in token_ids_list]
|
special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "N"
|
7 |
+
}
|
tokenizer_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"clean_up_tokenization_spaces": true,
|
3 |
+
"cls_token": "[CLS]",
|
4 |
+
"model_max_length": 1000000000000000019884624838656,
|
5 |
+
"tokenizer_class": "ProkBERTTokenizer"
|
6 |
+
}
|
vocab.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[PAD]\n[UNK]\n[CLS]\n[SEP]\n[MASK]\nAAAAAA\nAAAAAC\nAAAAAG\nAAAAAT\nAAAACA\nAAAACC\nAAAACG\nAAAACT\nAAAAGA\nAAAAGC\nAAAAGG\nAAAAGT\nAAAATA\nAAAATC\nAAAATG\nAAAATT\nAAACAA\nAAACAC\nAAACAG\nAAACAT\nAAACCA\nAAACCC\nAAACCG\nAAACCT\nAAACGA\nAAACGC\nAAACGG\nAAACGT\nAAACTA\nAAACTC\nAAACTG\nAAACTT\nAAAGAA\nAAAGAC\nAAAGAG\nAAAGAT\nAAAGCA\nAAAGCC\nAAAGCG\nAAAGCT\nAAAGGA\nAAAGGC\nAAAGGG\nAAAGGT\nAAAGTA\nAAAGTC\nAAAGTG\nAAAGTT\nAAATAA\nAAATAC\nAAATAG\nAAATAT\nAAATCA\nAAATCC\nAAATCG\nAAATCT\nAAATGA\nAAATGC\nAAATGG\nAAATGT\nAAATTA\nAAATTC\nAAATTG\nAAATTT\nAACAAA\nAACAAC\nAACAAG\nAACAAT\nAACACA\nAACACC\nAACACG\nAACACT\nAACAGA\nAACAGC\nAACAGG\nAACAGT\nAACATA\nAACATC\nAACATG\nAACATT\nAACCAA\nAACCAC\nAACCAG\nAACCAT\nAACCCA\nAACCCC\nAACCCG\nAACCCT\nAACCGA\nAACCGC\nAACCGG\nAACCGT\nAACCTA\nAACCTC\nAACCTG\nAACCTT\nAACGAA\nAACGAC\nAACGAG\nAACGAT\nAACGCA\nAACGCC\nAACGCG\nAACGCT\nAACGGA\nAACGGC\nAACGGG\nAACGGT\nAACGTA\nAACGTC\nAACGTG\nAACGTT\nAACTAA\nAACTAC\nAACTAG\nAACTAT\nAACTCA\nAACTCC\nAACTCG\nAACTCT\nAACTGA\nAACTGC\nAACTGG\nAACTGT\nAACTTA\nAACTTC\nAACTTG\nAACTTT\nAAGAAA\nAAGAAC\nAAGAAG\nAAGAAT\nAAGACA\nAAGACC\nAAGACG\nAAGACT\nAAGAGA\nAAGAGC\nAAGAGG\nAAGAGT\nAAGATA\nAAGATC\nAAGATG\nAAGATT\nAAGCAA\nAAGCAC\nAAGCAG\nAAGCAT\nAAGCCA\nAAGCCC\nAAGCCG\nAAGCCT\nAAGCGA\nAAGCGC\nAAGCGG\nAAGCGT\nAAGCTA\nAAGCTC\nAAGCTG\nAAGCTT\nAAGGAA\nAAGGAC\nAAGGAG\nAAGGAT\nAAGGCA\nAAGGCC\nAAGGCG\nAAGGCT\nAAGGGA\nAAGGGC\nAAGGGG\nAAGGGT\nAAGGTA\nAAGGTC\nAAGGTG\nAAGGTT\nAAGTAA\nAAGTAC\nAAGTAG\nAAGTAT\nAAGTCA\nAAGTCC\nAAGTCG\nAAGTCT\nAAGTGA\nAAGTGC\nAAGTGG\nAAGTGT\nAAGTTA\nAAGTTC\nAAGTTG\nAAGTTT\nAATAAA\nAATAAC\nAATAAG\nAATAAT\nAATACA\nAATACC\nAATACG\nAATACT\nAATAGA\nAATAGC\nAATAGG\nAATAGT\nAATATA\nAATATC\nAATATG\nAATATT\nAATCAA\nAATCAC\nAATCAG\nAATCAT\nAATCCA\nAATCCC\nAATCCG\nAATCCT\nAATCGA\nAATCGC\nAATCGG\nAATCGT\nAATCTA\nAATCTC\nAATCTG\nAATCTT\nAATGAA\nAATGAC\nAATGAG\nAATGAT\nAATGCA\nAATGCC\nAATGCG\nAATGCT\nAATGGA\nAATGGC\nAATGGG\nAATGGT\nAATGTA\nAATGTC\nAATGTG\nAATGTT\nAATTAA\nAATTAC\nAATTAG\nAATTAT\nAATTCA\nAATTCC\nAATTCG\nAATTCT\nAATTGA\nAATTGC\nAATTGG\nAATTGT\nAATTTA\nAATTTC\nAATTTG\nAATTTT\nACAAAA\nACAAAC\nACAAAG\nACAAAT\nACAACA\nACAACC\nACAACG\nACAACT\nACAAGA\nACAAGC\nACAAGG\nACAAGT\nACAATA\nACAATC\nACAATG\nACAATT\nACACAA\nACACAC\nACACAG\nACACAT\nACACCA\nACACCC\nACACCG\nACACCT\nACACGA\nACACGC\nACACGG\nACACGT\nACACTA\nACACTC\nACACTG\nACACTT\nACAGAA\nACAGAC\nACAGAG\nACAGAT\nACAGCA\nACAGCC\nACAGCG\nACAGCT\nACAGGA\nACAGGC\nACAGGG\nACAGGT\nACAGTA\nACAGTC\nACAGTG\nACAGTT\nACATAA\nACATAC\nACATAG\nACATAT\nACATCA\nACATCC\nACATCG\nACATCT\nACATGA\nACATGC\nACATGG\nACATGT\nACATTA\nACATTC\nACATTG\nACATTT\nACCAAA\nACCAAC\nACCAAG\nACCAAT\nACCACA\nACCACC\nACCACG\nACCACT\nACCAGA\nACCAGC\nACCAGG\nACCAGT\nACCATA\nACCATC\nACCATG\nACCATT\nACCCAA\nACCCAC\nACCCAG\nACCCAT\nACCCCA\nACCCCC\nACCCCG\nACCCCT\nACCCGA\nACCCGC\nACCCGG\nACCCGT\nACCCTA\nACCCTC\nACCCTG\nACCCTT\nACCGAA\nACCGAC\nACCGAG\nACCGAT\nACCGCA\nACCGCC\nACCGCG\nACCGCT\nACCGGA\nACCGGC\nACCGGG\nACCGGT\nACCGTA\nACCGTC\nACCGTG\nACCGTT\nACCTAA\nACCTAC\nACCTAG\nACCTAT\nACCTCA\nACCTCC\nACCTCG\nACCTCT\nACCTGA\nACCTGC\nACCTGG\nACCTGT\nACCTTA\nACCTTC\nACCTTG\nACCTTT\nACGAAA\nACGAAC\nACGAAG\nACGAAT\nACGACA\nACGACC\nACGACG\nACGACT\nACGAGA\nACGAGC\nACGAGG\nACGAGT\nACGATA\nACGATC\nACGATG\nACGATT\nACGCAA\nACGCAC\nACGCAG\nACGCAT\nACGCCA\nACGCCC\nACGCCG\nACGCCT\nACGCGA\nACGCGC\nACGCGG\nACGCGT\nACGCTA\nACGCTC\nACGCTG\nACGCTT\nACGGAA\nACGGAC\nACGGAG\nACGGAT\nACGGCA\nACGGCC\nACGGCG\nACGGCT\nACGGGA\nACGGGC\nACGGGG\nACGGGT\nACGGTA\nACGGTC\nACGGTG\nACGGTT\nACGTAA\nACGTAC\nACGTAG\nACGTAT\nACGTCA\nACGTCC\nACGTCG\nACGTCT\nACGTGA\nACGTGC\nACGTGG\nACGTGT\nACGTTA\nACGTTC\nACGTTG\nACGTTT\nACTAAA\nACTAAC\nACTAAG\nACTAAT\nACTACA\nACTACC\nACTACG\nACTACT\nACTAGA\nACTAGC\nACTAGG\nACTAGT\nACTATA\nACTATC\nACTATG\nACTATT\nACTCAA\nACTCAC\nACTCAG\nACTCAT\nACTCCA\nACTCCC\nACTCCG\nACTCCT\nACTCGA\nACTCGC\nACTCGG\nACTCGT\nACTCTA\nACTCTC\nACTCTG\nACTCTT\nACTGAA\nACTGAC\nACTGAG\nACTGAT\nACTGCA\nACTGCC\nACTGCG\nACTGCT\nACTGGA\nACTGGC\nACTGGG\nACTGGT\nACTGTA\nACTGTC\nACTGTG\nACTGTT\nACTTAA\nACTTAC\nACTTAG\nACTTAT\nACTTCA\nACTTCC\nACTTCG\nACTTCT\nACTTGA\nACTTGC\nACTTGG\nACTTGT\nACTTTA\nACTTTC\nACTTTG\nACTTTT\nAGAAAA\nAGAAAC\nAGAAAG\nAGAAAT\nAGAACA\nAGAACC\nAGAACG\nAGAACT\nAGAAGA\nAGAAGC\nAGAAGG\nAGAAGT\nAGAATA\nAGAATC\nAGAATG\nAGAATT\nAGACAA\nAGACAC\nAGACAG\nAGACAT\nAGACCA\nAGACCC\nAGACCG\nAGACCT\nAGACGA\nAGACGC\nAGACGG\nAGACGT\nAGACTA\nAGACTC\nAGACTG\nAGACTT\nAGAGAA\nAGAGAC\nAGAGAG\nAGAGAT\nAGAGCA\nAGAGCC\nAGAGCG\nAGAGCT\nAGAGGA\nAGAGGC\nAGAGGG\nAGAGGT\nAGAGTA\nAGAGTC\nAGAGTG\nAGAGTT\nAGATAA\nAGATAC\nAGATAG\nAGATAT\nAGATCA\nAGATCC\nAGATCG\nAGATCT\nAGATGA\nAGATGC\nAGATGG\nAGATGT\nAGATTA\nAGATTC\nAGATTG\nAGATTT\nAGCAAA\nAGCAAC\nAGCAAG\nAGCAAT\nAGCACA\nAGCACC\nAGCACG\nAGCACT\nAGCAGA\nAGCAGC\nAGCAGG\nAGCAGT\nAGCATA\nAGCATC\nAGCATG\nAGCATT\nAGCCAA\nAGCCAC\nAGCCAG\nAGCCAT\nAGCCCA\nAGCCCC\nAGCCCG\nAGCCCT\nAGCCGA\nAGCCGC\nAGCCGG\nAGCCGT\nAGCCTA\nAGCCTC\nAGCCTG\nAGCCTT\nAGCGAA\nAGCGAC\nAGCGAG\nAGCGAT\nAGCGCA\nAGCGCC\nAGCGCG\nAGCGCT\nAGCGGA\nAGCGGC\nAGCGGG\nAGCGGT\nAGCGTA\nAGCGTC\nAGCGTG\nAGCGTT\nAGCTAA\nAGCTAC\nAGCTAG\nAGCTAT\nAGCTCA\nAGCTCC\nAGCTCG\nAGCTCT\nAGCTGA\nAGCTGC\nAGCTGG\nAGCTGT\nAGCTTA\nAGCTTC\nAGCTTG\nAGCTTT\nAGGAAA\nAGGAAC\nAGGAAG\nAGGAAT\nAGGACA\nAGGACC\nAGGACG\nAGGACT\nAGGAGA\nAGGAGC\nAGGAGG\nAGGAGT\nAGGATA\nAGGATC\nAGGATG\nAGGATT\nAGGCAA\nAGGCAC\nAGGCAG\nAGGCAT\nAGGCCA\nAGGCCC\nAGGCCG\nAGGCCT\nAGGCGA\nAGGCGC\nAGGCGG\nAGGCGT\nAGGCTA\nAGGCTC\nAGGCTG\nAGGCTT\nAGGGAA\nAGGGAC\nAGGGAG\nAGGGAT\nAGGGCA\nAGGGCC\nAGGGCG\nAGGGCT\nAGGGGA\nAGGGGC\nAGGGGG\nAGGGGT\nAGGGTA\nAGGGTC\nAGGGTG\nAGGGTT\nAGGTAA\nAGGTAC\nAGGTAG\nAGGTAT\nAGGTCA\nAGGTCC\nAGGTCG\nAGGTCT\nAGGTGA\nAGGTGC\nAGGTGG\nAGGTGT\nAGGTTA\nAGGTTC\nAGGTTG\nAGGTTT\nAGTAAA\nAGTAAC\nAGTAAG\nAGTAAT\nAGTACA\nAGTACC\nAGTACG\nAGTACT\nAGTAGA\nAGTAGC\nAGTAGG\nAGTAGT\nAGTATA\nAGTATC\nAGTATG\nAGTATT\nAGTCAA\nAGTCAC\nAGTCAG\nAGTCAT\nAGTCCA\nAGTCCC\nAGTCCG\nAGTCCT\nAGTCGA\nAGTCGC\nAGTCGG\nAGTCGT\nAGTCTA\nAGTCTC\nAGTCTG\nAGTCTT\nAGTGAA\nAGTGAC\nAGTGAG\nAGTGAT\nAGTGCA\nAGTGCC\nAGTGCG\nAGTGCT\nAGTGGA\nAGTGGC\nAGTGGG\nAGTGGT\nAGTGTA\nAGTGTC\nAGTGTG\nAGTGTT\nAGTTAA\nAGTTAC\nAGTTAG\nAGTTAT\nAGTTCA\nAGTTCC\nAGTTCG\nAGTTCT\nAGTTGA\nAGTTGC\nAGTTGG\nAGTTGT\nAGTTTA\nAGTTTC\nAGTTTG\nAGTTTT\nATAAAA\nATAAAC\nATAAAG\nATAAAT\nATAACA\nATAACC\nATAACG\nATAACT\nATAAGA\nATAAGC\nATAAGG\nATAAGT\nATAATA\nATAATC\nATAATG\nATAATT\nATACAA\nATACAC\nATACAG\nATACAT\nATACCA\nATACCC\nATACCG\nATACCT\nATACGA\nATACGC\nATACGG\nATACGT\nATACTA\nATACTC\nATACTG\nATACTT\nATAGAA\nATAGAC\nATAGAG\nATAGAT\nATAGCA\nATAGCC\nATAGCG\nATAGCT\nATAGGA\nATAGGC\nATAGGG\nATAGGT\nATAGTA\nATAGTC\nATAGTG\nATAGTT\nATATAA\nATATAC\nATATAG\nATATAT\nATATCA\nATATCC\nATATCG\nATATCT\nATATGA\nATATGC\nATATGG\nATATGT\nATATTA\nATATTC\nATATTG\nATATTT\nATCAAA\nATCAAC\nATCAAG\nATCAAT\nATCACA\nATCACC\nATCACG\nATCACT\nATCAGA\nATCAGC\nATCAGG\nATCAGT\nATCATA\nATCATC\nATCATG\nATCATT\nATCCAA\nATCCAC\nATCCAG\nATCCAT\nATCCCA\nATCCCC\nATCCCG\nATCCCT\nATCCGA\nATCCGC\nATCCGG\nATCCGT\nATCCTA\nATCCTC\nATCCTG\nATCCTT\nATCGAA\nATCGAC\nATCGAG\nATCGAT\nATCGCA\nATCGCC\nATCGCG\nATCGCT\nATCGGA\nATCGGC\nATCGGG\nATCGGT\nATCGTA\nATCGTC\nATCGTG\nATCGTT\nATCTAA\nATCTAC\nATCTAG\nATCTAT\nATCTCA\nATCTCC\nATCTCG\nATCTCT\nATCTGA\nATCTGC\nATCTGG\nATCTGT\nATCTTA\nATCTTC\nATCTTG\nATCTTT\nATGAAA\nATGAAC\nATGAAG\nATGAAT\nATGACA\nATGACC\nATGACG\nATGACT\nATGAGA\nATGAGC\nATGAGG\nATGAGT\nATGATA\nATGATC\nATGATG\nATGATT\nATGCAA\nATGCAC\nATGCAG\nATGCAT\nATGCCA\nATGCCC\nATGCCG\nATGCCT\nATGCGA\nATGCGC\nATGCGG\nATGCGT\nATGCTA\nATGCTC\nATGCTG\nATGCTT\nATGGAA\nATGGAC\nATGGAG\nATGGAT\nATGGCA\nATGGCC\nATGGCG\nATGGCT\nATGGGA\nATGGGC\nATGGGG\nATGGGT\nATGGTA\nATGGTC\nATGGTG\nATGGTT\nATGTAA\nATGTAC\nATGTAG\nATGTAT\nATGTCA\nATGTCC\nATGTCG\nATGTCT\nATGTGA\nATGTGC\nATGTGG\nATGTGT\nATGTTA\nATGTTC\nATGTTG\nATGTTT\nATTAAA\nATTAAC\nATTAAG\nATTAAT\nATTACA\nATTACC\nATTACG\nATTACT\nATTAGA\nATTAGC\nATTAGG\nATTAGT\nATTATA\nATTATC\nATTATG\nATTATT\nATTCAA\nATTCAC\nATTCAG\nATTCAT\nATTCCA\nATTCCC\nATTCCG\nATTCCT\nATTCGA\nATTCGC\nATTCGG\nATTCGT\nATTCTA\nATTCTC\nATTCTG\nATTCTT\nATTGAA\nATTGAC\nATTGAG\nATTGAT\nATTGCA\nATTGCC\nATTGCG\nATTGCT\nATTGGA\nATTGGC\nATTGGG\nATTGGT\nATTGTA\nATTGTC\nATTGTG\nATTGTT\nATTTAA\nATTTAC\nATTTAG\nATTTAT\nATTTCA\nATTTCC\nATTTCG\nATTTCT\nATTTGA\nATTTGC\nATTTGG\nATTTGT\nATTTTA\nATTTTC\nATTTTG\nATTTTT\nCAAAAA\nCAAAAC\nCAAAAG\nCAAAAT\nCAAACA\nCAAACC\nCAAACG\nCAAACT\nCAAAGA\nCAAAGC\nCAAAGG\nCAAAGT\nCAAATA\nCAAATC\nCAAATG\nCAAATT\nCAACAA\nCAACAC\nCAACAG\nCAACAT\nCAACCA\nCAACCC\nCAACCG\nCAACCT\nCAACGA\nCAACGC\nCAACGG\nCAACGT\nCAACTA\nCAACTC\nCAACTG\nCAACTT\nCAAGAA\nCAAGAC\nCAAGAG\nCAAGAT\nCAAGCA\nCAAGCC\nCAAGCG\nCAAGCT\nCAAGGA\nCAAGGC\nCAAGGG\nCAAGGT\nCAAGTA\nCAAGTC\nCAAGTG\nCAAGTT\nCAATAA\nCAATAC\nCAATAG\nCAATAT\nCAATCA\nCAATCC\nCAATCG\nCAATCT\nCAATGA\nCAATGC\nCAATGG\nCAATGT\nCAATTA\nCAATTC\nCAATTG\nCAATTT\nCACAAA\nCACAAC\nCACAAG\nCACAAT\nCACACA\nCACACC\nCACACG\nCACACT\nCACAGA\nCACAGC\nCACAGG\nCACAGT\nCACATA\nCACATC\nCACATG\nCACATT\nCACCAA\nCACCAC\nCACCAG\nCACCAT\nCACCCA\nCACCCC\nCACCCG\nCACCCT\nCACCGA\nCACCGC\nCACCGG\nCACCGT\nCACCTA\nCACCTC\nCACCTG\nCACCTT\nCACGAA\nCACGAC\nCACGAG\nCACGAT\nCACGCA\nCACGCC\nCACGCG\nCACGCT\nCACGGA\nCACGGC\nCACGGG\nCACGGT\nCACGTA\nCACGTC\nCACGTG\nCACGTT\nCACTAA\nCACTAC\nCACTAG\nCACTAT\nCACTCA\nCACTCC\nCACTCG\nCACTCT\nCACTGA\nCACTGC\nCACTGG\nCACTGT\nCACTTA\nCACTTC\nCACTTG\nCACTTT\nCAGAAA\nCAGAAC\nCAGAAG\nCAGAAT\nCAGACA\nCAGACC\nCAGACG\nCAGACT\nCAGAGA\nCAGAGC\nCAGAGG\nCAGAGT\nCAGATA\nCAGATC\nCAGATG\nCAGATT\nCAGCAA\nCAGCAC\nCAGCAG\nCAGCAT\nCAGCCA\nCAGCCC\nCAGCCG\nCAGCCT\nCAGCGA\nCAGCGC\nCAGCGG\nCAGCGT\nCAGCTA\nCAGCTC\nCAGCTG\nCAGCTT\nCAGGAA\nCAGGAC\nCAGGAG\nCAGGAT\nCAGGCA\nCAGGCC\nCAGGCG\nCAGGCT\nCAGGGA\nCAGGGC\nCAGGGG\nCAGGGT\nCAGGTA\nCAGGTC\nCAGGTG\nCAGGTT\nCAGTAA\nCAGTAC\nCAGTAG\nCAGTAT\nCAGTCA\nCAGTCC\nCAGTCG\nCAGTCT\nCAGTGA\nCAGTGC\nCAGTGG\nCAGTGT\nCAGTTA\nCAGTTC\nCAGTTG\nCAGTTT\nCATAAA\nCATAAC\nCATAAG\nCATAAT\nCATACA\nCATACC\nCATACG\nCATACT\nCATAGA\nCATAGC\nCATAGG\nCATAGT\nCATATA\nCATATC\nCATATG\nCATATT\nCATCAA\nCATCAC\nCATCAG\nCATCAT\nCATCCA\nCATCCC\nCATCCG\nCATCCT\nCATCGA\nCATCGC\nCATCGG\nCATCGT\nCATCTA\nCATCTC\nCATCTG\nCATCTT\nCATGAA\nCATGAC\nCATGAG\nCATGAT\nCATGCA\nCATGCC\nCATGCG\nCATGCT\nCATGGA\nCATGGC\nCATGGG\nCATGGT\nCATGTA\nCATGTC\nCATGTG\nCATGTT\nCATTAA\nCATTAC\nCATTAG\nCATTAT\nCATTCA\nCATTCC\nCATTCG\nCATTCT\nCATTGA\nCATTGC\nCATTGG\nCATTGT\nCATTTA\nCATTTC\nCATTTG\nCATTTT\nCCAAAA\nCCAAAC\nCCAAAG\nCCAAAT\nCCAACA\nCCAACC\nCCAACG\nCCAACT\nCCAAGA\nCCAAGC\nCCAAGG\nCCAAGT\nCCAATA\nCCAATC\nCCAATG\nCCAATT\nCCACAA\nCCACAC\nCCACAG\nCCACAT\nCCACCA\nCCACCC\nCCACCG\nCCACCT\nCCACGA\nCCACGC\nCCACGG\nCCACGT\nCCACTA\nCCACTC\nCCACTG\nCCACTT\nCCAGAA\nCCAGAC\nCCAGAG\nCCAGAT\nCCAGCA\nCCAGCC\nCCAGCG\nCCAGCT\nCCAGGA\nCCAGGC\nCCAGGG\nCCAGGT\nCCAGTA\nCCAGTC\nCCAGTG\nCCAGTT\nCCATAA\nCCATAC\nCCATAG\nCCATAT\nCCATCA\nCCATCC\nCCATCG\nCCATCT\nCCATGA\nCCATGC\nCCATGG\nCCATGT\nCCATTA\nCCATTC\nCCATTG\nCCATTT\nCCCAAA\nCCCAAC\nCCCAAG\nCCCAAT\nCCCACA\nCCCACC\nCCCACG\nCCCACT\nCCCAGA\nCCCAGC\nCCCAGG\nCCCAGT\nCCCATA\nCCCATC\nCCCATG\nCCCATT\nCCCCAA\nCCCCAC\nCCCCAG\nCCCCAT\nCCCCCA\nCCCCCC\nCCCCCG\nCCCCCT\nCCCCGA\nCCCCGC\nCCCCGG\nCCCCGT\nCCCCTA\nCCCCTC\nCCCCTG\nCCCCTT\nCCCGAA\nCCCGAC\nCCCGAG\nCCCGAT\nCCCGCA\nCCCGCC\nCCCGCG\nCCCGCT\nCCCGGA\nCCCGGC\nCCCGGG\nCCCGGT\nCCCGTA\nCCCGTC\nCCCGTG\nCCCGTT\nCCCTAA\nCCCTAC\nCCCTAG\nCCCTAT\nCCCTCA\nCCCTCC\nCCCTCG\nCCCTCT\nCCCTGA\nCCCTGC\nCCCTGG\nCCCTGT\nCCCTTA\nCCCTTC\nCCCTTG\nCCCTTT\nCCGAAA\nCCGAAC\nCCGAAG\nCCGAAT\nCCGACA\nCCGACC\nCCGACG\nCCGACT\nCCGAGA\nCCGAGC\nCCGAGG\nCCGAGT\nCCGATA\nCCGATC\nCCGATG\nCCGATT\nCCGCAA\nCCGCAC\nCCGCAG\nCCGCAT\nCCGCCA\nCCGCCC\nCCGCCG\nCCGCCT\nCCGCGA\nCCGCGC\nCCGCGG\nCCGCGT\nCCGCTA\nCCGCTC\nCCGCTG\nCCGCTT\nCCGGAA\nCCGGAC\nCCGGAG\nCCGGAT\nCCGGCA\nCCGGCC\nCCGGCG\nCCGGCT\nCCGGGA\nCCGGGC\nCCGGGG\nCCGGGT\nCCGGTA\nCCGGTC\nCCGGTG\nCCGGTT\nCCGTAA\nCCGTAC\nCCGTAG\nCCGTAT\nCCGTCA\nCCGTCC\nCCGTCG\nCCGTCT\nCCGTGA\nCCGTGC\nCCGTGG\nCCGTGT\nCCGTTA\nCCGTTC\nCCGTTG\nCCGTTT\nCCTAAA\nCCTAAC\nCCTAAG\nCCTAAT\nCCTACA\nCCTACC\nCCTACG\nCCTACT\nCCTAGA\nCCTAGC\nCCTAGG\nCCTAGT\nCCTATA\nCCTATC\nCCTATG\nCCTATT\nCCTCAA\nCCTCAC\nCCTCAG\nCCTCAT\nCCTCCA\nCCTCCC\nCCTCCG\nCCTCCT\nCCTCGA\nCCTCGC\nCCTCGG\nCCTCGT\nCCTCTA\nCCTCTC\nCCTCTG\nCCTCTT\nCCTGAA\nCCTGAC\nCCTGAG\nCCTGAT\nCCTGCA\nCCTGCC\nCCTGCG\nCCTGCT\nCCTGGA\nCCTGGC\nCCTGGG\nCCTGGT\nCCTGTA\nCCTGTC\nCCTGTG\nCCTGTT\nCCTTAA\nCCTTAC\nCCTTAG\nCCTTAT\nCCTTCA\nCCTTCC\nCCTTCG\nCCTTCT\nCCTTGA\nCCTTGC\nCCTTGG\nCCTTGT\nCCTTTA\nCCTTTC\nCCTTTG\nCCTTTT\nCGAAAA\nCGAAAC\nCGAAAG\nCGAAAT\nCGAACA\nCGAACC\nCGAACG\nCGAACT\nCGAAGA\nCGAAGC\nCGAAGG\nCGAAGT\nCGAATA\nCGAATC\nCGAATG\nCGAATT\nCGACAA\nCGACAC\nCGACAG\nCGACAT\nCGACCA\nCGACCC\nCGACCG\nCGACCT\nCGACGA\nCGACGC\nCGACGG\nCGACGT\nCGACTA\nCGACTC\nCGACTG\nCGACTT\nCGAGAA\nCGAGAC\nCGAGAG\nCGAGAT\nCGAGCA\nCGAGCC\nCGAGCG\nCGAGCT\nCGAGGA\nCGAGGC\nCGAGGG\nCGAGGT\nCGAGTA\nCGAGTC\nCGAGTG\nCGAGTT\nCGATAA\nCGATAC\nCGATAG\nCGATAT\nCGATCA\nCGATCC\nCGATCG\nCGATCT\nCGATGA\nCGATGC\nCGATGG\nCGATGT\nCGATTA\nCGATTC\nCGATTG\nCGATTT\nCGCAAA\nCGCAAC\nCGCAAG\nCGCAAT\nCGCACA\nCGCACC\nCGCACG\nCGCACT\nCGCAGA\nCGCAGC\nCGCAGG\nCGCAGT\nCGCATA\nCGCATC\nCGCATG\nCGCATT\nCGCCAA\nCGCCAC\nCGCCAG\nCGCCAT\nCGCCCA\nCGCCCC\nCGCCCG\nCGCCCT\nCGCCGA\nCGCCGC\nCGCCGG\nCGCCGT\nCGCCTA\nCGCCTC\nCGCCTG\nCGCCTT\nCGCGAA\nCGCGAC\nCGCGAG\nCGCGAT\nCGCGCA\nCGCGCC\nCGCGCG\nCGCGCT\nCGCGGA\nCGCGGC\nCGCGGG\nCGCGGT\nCGCGTA\nCGCGTC\nCGCGTG\nCGCGTT\nCGCTAA\nCGCTAC\nCGCTAG\nCGCTAT\nCGCTCA\nCGCTCC\nCGCTCG\nCGCTCT\nCGCTGA\nCGCTGC\nCGCTGG\nCGCTGT\nCGCTTA\nCGCTTC\nCGCTTG\nCGCTTT\nCGGAAA\nCGGAAC\nCGGAAG\nCGGAAT\nCGGACA\nCGGACC\nCGGACG\nCGGACT\nCGGAGA\nCGGAGC\nCGGAGG\nCGGAGT\nCGGATA\nCGGATC\nCGGATG\nCGGATT\nCGGCAA\nCGGCAC\nCGGCAG\nCGGCAT\nCGGCCA\nCGGCCC\nCGGCCG\nCGGCCT\nCGGCGA\nCGGCGC\nCGGCGG\nCGGCGT\nCGGCTA\nCGGCTC\nCGGCTG\nCGGCTT\nCGGGAA\nCGGGAC\nCGGGAG\nCGGGAT\nCGGGCA\nCGGGCC\nCGGGCG\nCGGGCT\nCGGGGA\nCGGGGC\nCGGGGG\nCGGGGT\nCGGGTA\nCGGGTC\nCGGGTG\nCGGGTT\nCGGTAA\nCGGTAC\nCGGTAG\nCGGTAT\nCGGTCA\nCGGTCC\nCGGTCG\nCGGTCT\nCGGTGA\nCGGTGC\nCGGTGG\nCGGTGT\nCGGTTA\nCGGTTC\nCGGTTG\nCGGTTT\nCGTAAA\nCGTAAC\nCGTAAG\nCGTAAT\nCGTACA\nCGTACC\nCGTACG\nCGTACT\nCGTAGA\nCGTAGC\nCGTAGG\nCGTAGT\nCGTATA\nCGTATC\nCGTATG\nCGTATT\nCGTCAA\nCGTCAC\nCGTCAG\nCGTCAT\nCGTCCA\nCGTCCC\nCGTCCG\nCGTCCT\nCGTCGA\nCGTCGC\nCGTCGG\nCGTCGT\nCGTCTA\nCGTCTC\nCGTCTG\nCGTCTT\nCGTGAA\nCGTGAC\nCGTGAG\nCGTGAT\nCGTGCA\nCGTGCC\nCGTGCG\nCGTGCT\nCGTGGA\nCGTGGC\nCGTGGG\nCGTGGT\nCGTGTA\nCGTGTC\nCGTGTG\nCGTGTT\nCGTTAA\nCGTTAC\nCGTTAG\nCGTTAT\nCGTTCA\nCGTTCC\nCGTTCG\nCGTTCT\nCGTTGA\nCGTTGC\nCGTTGG\nCGTTGT\nCGTTTA\nCGTTTC\nCGTTTG\nCGTTTT\nCTAAAA\nCTAAAC\nCTAAAG\nCTAAAT\nCTAACA\nCTAACC\nCTAACG\nCTAACT\nCTAAGA\nCTAAGC\nCTAAGG\nCTAAGT\nCTAATA\nCTAATC\nCTAATG\nCTAATT\nCTACAA\nCTACAC\nCTACAG\nCTACAT\nCTACCA\nCTACCC\nCTACCG\nCTACCT\nCTACGA\nCTACGC\nCTACGG\nCTACGT\nCTACTA\nCTACTC\nCTACTG\nCTACTT\nCTAGAA\nCTAGAC\nCTAGAG\nCTAGAT\nCTAGCA\nCTAGCC\nCTAGCG\nCTAGCT\nCTAGGA\nCTAGGC\nCTAGGG\nCTAGGT\nCTAGTA\nCTAGTC\nCTAGTG\nCTAGTT\nCTATAA\nCTATAC\nCTATAG\nCTATAT\nCTATCA\nCTATCC\nCTATCG\nCTATCT\nCTATGA\nCTATGC\nCTATGG\nCTATGT\nCTATTA\nCTATTC\nCTATTG\nCTATTT\nCTCAAA\nCTCAAC\nCTCAAG\nCTCAAT\nCTCACA\nCTCACC\nCTCACG\nCTCACT\nCTCAGA\nCTCAGC\nCTCAGG\nCTCAGT\nCTCATA\nCTCATC\nCTCATG\nCTCATT\nCTCCAA\nCTCCAC\nCTCCAG\nCTCCAT\nCTCCCA\nCTCCCC\nCTCCCG\nCTCCCT\nCTCCGA\nCTCCGC\nCTCCGG\nCTCCGT\nCTCCTA\nCTCCTC\nCTCCTG\nCTCCTT\nCTCGAA\nCTCGAC\nCTCGAG\nCTCGAT\nCTCGCA\nCTCGCC\nCTCGCG\nCTCGCT\nCTCGGA\nCTCGGC\nCTCGGG\nCTCGGT\nCTCGTA\nCTCGTC\nCTCGTG\nCTCGTT\nCTCTAA\nCTCTAC\nCTCTAG\nCTCTAT\nCTCTCA\nCTCTCC\nCTCTCG\nCTCTCT\nCTCTGA\nCTCTGC\nCTCTGG\nCTCTGT\nCTCTTA\nCTCTTC\nCTCTTG\nCTCTTT\nCTGAAA\nCTGAAC\nCTGAAG\nCTGAAT\nCTGACA\nCTGACC\nCTGACG\nCTGACT\nCTGAGA\nCTGAGC\nCTGAGG\nCTGAGT\nCTGATA\nCTGATC\nCTGATG\nCTGATT\nCTGCAA\nCTGCAC\nCTGCAG\nCTGCAT\nCTGCCA\nCTGCCC\nCTGCCG\nCTGCCT\nCTGCGA\nCTGCGC\nCTGCGG\nCTGCGT\nCTGCTA\nCTGCTC\nCTGCTG\nCTGCTT\nCTGGAA\nCTGGAC\nCTGGAG\nCTGGAT\nCTGGCA\nCTGGCC\nCTGGCG\nCTGGCT\nCTGGGA\nCTGGGC\nCTGGGG\nCTGGGT\nCTGGTA\nCTGGTC\nCTGGTG\nCTGGTT\nCTGTAA\nCTGTAC\nCTGTAG\nCTGTAT\nCTGTCA\nCTGTCC\nCTGTCG\nCTGTCT\nCTGTGA\nCTGTGC\nCTGTGG\nCTGTGT\nCTGTTA\nCTGTTC\nCTGTTG\nCTGTTT\nCTTAAA\nCTTAAC\nCTTAAG\nCTTAAT\nCTTACA\nCTTACC\nCTTACG\nCTTACT\nCTTAGA\nCTTAGC\nCTTAGG\nCTTAGT\nCTTATA\nCTTATC\nCTTATG\nCTTATT\nCTTCAA\nCTTCAC\nCTTCAG\nCTTCAT\nCTTCCA\nCTTCCC\nCTTCCG\nCTTCCT\nCTTCGA\nCTTCGC\nCTTCGG\nCTTCGT\nCTTCTA\nCTTCTC\nCTTCTG\nCTTCTT\nCTTGAA\nCTTGAC\nCTTGAG\nCTTGAT\nCTTGCA\nCTTGCC\nCTTGCG\nCTTGCT\nCTTGGA\nCTTGGC\nCTTGGG\nCTTGGT\nCTTGTA\nCTTGTC\nCTTGTG\nCTTGTT\nCTTTAA\nCTTTAC\nCTTTAG\nCTTTAT\nCTTTCA\nCTTTCC\nCTTTCG\nCTTTCT\nCTTTGA\nCTTTGC\nCTTTGG\nCTTTGT\nCTTTTA\nCTTTTC\nCTTTTG\nCTTTTT\nGAAAAA\nGAAAAC\nGAAAAG\nGAAAAT\nGAAACA\nGAAACC\nGAAACG\nGAAACT\nGAAAGA\nGAAAGC\nGAAAGG\nGAAAGT\nGAAATA\nGAAATC\nGAAATG\nGAAATT\nGAACAA\nGAACAC\nGAACAG\nGAACAT\nGAACCA\nGAACCC\nGAACCG\nGAACCT\nGAACGA\nGAACGC\nGAACGG\nGAACGT\nGAACTA\nGAACTC\nGAACTG\nGAACTT\nGAAGAA\nGAAGAC\nGAAGAG\nGAAGAT\nGAAGCA\nGAAGCC\nGAAGCG\nGAAGCT\nGAAGGA\nGAAGGC\nGAAGGG\nGAAGGT\nGAAGTA\nGAAGTC\nGAAGTG\nGAAGTT\nGAATAA\nGAATAC\nGAATAG\nGAATAT\nGAATCA\nGAATCC\nGAATCG\nGAATCT\nGAATGA\nGAATGC\nGAATGG\nGAATGT\nGAATTA\nGAATTC\nGAATTG\nGAATTT\nGACAAA\nGACAAC\nGACAAG\nGACAAT\nGACACA\nGACACC\nGACACG\nGACACT\nGACAGA\nGACAGC\nGACAGG\nGACAGT\nGACATA\nGACATC\nGACATG\nGACATT\nGACCAA\nGACCAC\nGACCAG\nGACCAT\nGACCCA\nGACCCC\nGACCCG\nGACCCT\nGACCGA\nGACCGC\nGACCGG\nGACCGT\nGACCTA\nGACCTC\nGACCTG\nGACCTT\nGACGAA\nGACGAC\nGACGAG\nGACGAT\nGACGCA\nGACGCC\nGACGCG\nGACGCT\nGACGGA\nGACGGC\nGACGGG\nGACGGT\nGACGTA\nGACGTC\nGACGTG\nGACGTT\nGACTAA\nGACTAC\nGACTAG\nGACTAT\nGACTCA\nGACTCC\nGACTCG\nGACTCT\nGACTGA\nGACTGC\nGACTGG\nGACTGT\nGACTTA\nGACTTC\nGACTTG\nGACTTT\nGAGAAA\nGAGAAC\nGAGAAG\nGAGAAT\nGAGACA\nGAGACC\nGAGACG\nGAGACT\nGAGAGA\nGAGAGC\nGAGAGG\nGAGAGT\nGAGATA\nGAGATC\nGAGATG\nGAGATT\nGAGCAA\nGAGCAC\nGAGCAG\nGAGCAT\nGAGCCA\nGAGCCC\nGAGCCG\nGAGCCT\nGAGCGA\nGAGCGC\nGAGCGG\nGAGCGT\nGAGCTA\nGAGCTC\nGAGCTG\nGAGCTT\nGAGGAA\nGAGGAC\nGAGGAG\nGAGGAT\nGAGGCA\nGAGGCC\nGAGGCG\nGAGGCT\nGAGGGA\nGAGGGC\nGAGGGG\nGAGGGT\nGAGGTA\nGAGGTC\nGAGGTG\nGAGGTT\nGAGTAA\nGAGTAC\nGAGTAG\nGAGTAT\nGAGTCA\nGAGTCC\nGAGTCG\nGAGTCT\nGAGTGA\nGAGTGC\nGAGTGG\nGAGTGT\nGAGTTA\nGAGTTC\nGAGTTG\nGAGTTT\nGATAAA\nGATAAC\nGATAAG\nGATAAT\nGATACA\nGATACC\nGATACG\nGATACT\nGATAGA\nGATAGC\nGATAGG\nGATAGT\nGATATA\nGATATC\nGATATG\nGATATT\nGATCAA\nGATCAC\nGATCAG\nGATCAT\nGATCCA\nGATCCC\nGATCCG\nGATCCT\nGATCGA\nGATCGC\nGATCGG\nGATCGT\nGATCTA\nGATCTC\nGATCTG\nGATCTT\nGATGAA\nGATGAC\nGATGAG\nGATGAT\nGATGCA\nGATGCC\nGATGCG\nGATGCT\nGATGGA\nGATGGC\nGATGGG\nGATGGT\nGATGTA\nGATGTC\nGATGTG\nGATGTT\nGATTAA\nGATTAC\nGATTAG\nGATTAT\nGATTCA\nGATTCC\nGATTCG\nGATTCT\nGATTGA\nGATTGC\nGATTGG\nGATTGT\nGATTTA\nGATTTC\nGATTTG\nGATTTT\nGCAAAA\nGCAAAC\nGCAAAG\nGCAAAT\nGCAACA\nGCAACC\nGCAACG\nGCAACT\nGCAAGA\nGCAAGC\nGCAAGG\nGCAAGT\nGCAATA\nGCAATC\nGCAATG\nGCAATT\nGCACAA\nGCACAC\nGCACAG\nGCACAT\nGCACCA\nGCACCC\nGCACCG\nGCACCT\nGCACGA\nGCACGC\nGCACGG\nGCACGT\nGCACTA\nGCACTC\nGCACTG\nGCACTT\nGCAGAA\nGCAGAC\nGCAGAG\nGCAGAT\nGCAGCA\nGCAGCC\nGCAGCG\nGCAGCT\nGCAGGA\nGCAGGC\nGCAGGG\nGCAGGT\nGCAGTA\nGCAGTC\nGCAGTG\nGCAGTT\nGCATAA\nGCATAC\nGCATAG\nGCATAT\nGCATCA\nGCATCC\nGCATCG\nGCATCT\nGCATGA\nGCATGC\nGCATGG\nGCATGT\nGCATTA\nGCATTC\nGCATTG\nGCATTT\nGCCAAA\nGCCAAC\nGCCAAG\nGCCAAT\nGCCACA\nGCCACC\nGCCACG\nGCCACT\nGCCAGA\nGCCAGC\nGCCAGG\nGCCAGT\nGCCATA\nGCCATC\nGCCATG\nGCCATT\nGCCCAA\nGCCCAC\nGCCCAG\nGCCCAT\nGCCCCA\nGCCCCC\nGCCCCG\nGCCCCT\nGCCCGA\nGCCCGC\nGCCCGG\nGCCCGT\nGCCCTA\nGCCCTC\nGCCCTG\nGCCCTT\nGCCGAA\nGCCGAC\nGCCGAG\nGCCGAT\nGCCGCA\nGCCGCC\nGCCGCG\nGCCGCT\nGCCGGA\nGCCGGC\nGCCGGG\nGCCGGT\nGCCGTA\nGCCGTC\nGCCGTG\nGCCGTT\nGCCTAA\nGCCTAC\nGCCTAG\nGCCTAT\nGCCTCA\nGCCTCC\nGCCTCG\nGCCTCT\nGCCTGA\nGCCTGC\nGCCTGG\nGCCTGT\nGCCTTA\nGCCTTC\nGCCTTG\nGCCTTT\nGCGAAA\nGCGAAC\nGCGAAG\nGCGAAT\nGCGACA\nGCGACC\nGCGACG\nGCGACT\nGCGAGA\nGCGAGC\nGCGAGG\nGCGAGT\nGCGATA\nGCGATC\nGCGATG\nGCGATT\nGCGCAA\nGCGCAC\nGCGCAG\nGCGCAT\nGCGCCA\nGCGCCC\nGCGCCG\nGCGCCT\nGCGCGA\nGCGCGC\nGCGCGG\nGCGCGT\nGCGCTA\nGCGCTC\nGCGCTG\nGCGCTT\nGCGGAA\nGCGGAC\nGCGGAG\nGCGGAT\nGCGGCA\nGCGGCC\nGCGGCG\nGCGGCT\nGCGGGA\nGCGGGC\nGCGGGG\nGCGGGT\nGCGGTA\nGCGGTC\nGCGGTG\nGCGGTT\nGCGTAA\nGCGTAC\nGCGTAG\nGCGTAT\nGCGTCA\nGCGTCC\nGCGTCG\nGCGTCT\nGCGTGA\nGCGTGC\nGCGTGG\nGCGTGT\nGCGTTA\nGCGTTC\nGCGTTG\nGCGTTT\nGCTAAA\nGCTAAC\nGCTAAG\nGCTAAT\nGCTACA\nGCTACC\nGCTACG\nGCTACT\nGCTAGA\nGCTAGC\nGCTAGG\nGCTAGT\nGCTATA\nGCTATC\nGCTATG\nGCTATT\nGCTCAA\nGCTCAC\nGCTCAG\nGCTCAT\nGCTCCA\nGCTCCC\nGCTCCG\nGCTCCT\nGCTCGA\nGCTCGC\nGCTCGG\nGCTCGT\nGCTCTA\nGCTCTC\nGCTCTG\nGCTCTT\nGCTGAA\nGCTGAC\nGCTGAG\nGCTGAT\nGCTGCA\nGCTGCC\nGCTGCG\nGCTGCT\nGCTGGA\nGCTGGC\nGCTGGG\nGCTGGT\nGCTGTA\nGCTGTC\nGCTGTG\nGCTGTT\nGCTTAA\nGCTTAC\nGCTTAG\nGCTTAT\nGCTTCA\nGCTTCC\nGCTTCG\nGCTTCT\nGCTTGA\nGCTTGC\nGCTTGG\nGCTTGT\nGCTTTA\nGCTTTC\nGCTTTG\nGCTTTT\nGGAAAA\nGGAAAC\nGGAAAG\nGGAAAT\nGGAACA\nGGAACC\nGGAACG\nGGAACT\nGGAAGA\nGGAAGC\nGGAAGG\nGGAAGT\nGGAATA\nGGAATC\nGGAATG\nGGAATT\nGGACAA\nGGACAC\nGGACAG\nGGACAT\nGGACCA\nGGACCC\nGGACCG\nGGACCT\nGGACGA\nGGACGC\nGGACGG\nGGACGT\nGGACTA\nGGACTC\nGGACTG\nGGACTT\nGGAGAA\nGGAGAC\nGGAGAG\nGGAGAT\nGGAGCA\nGGAGCC\nGGAGCG\nGGAGCT\nGGAGGA\nGGAGGC\nGGAGGG\nGGAGGT\nGGAGTA\nGGAGTC\nGGAGTG\nGGAGTT\nGGATAA\nGGATAC\nGGATAG\nGGATAT\nGGATCA\nGGATCC\nGGATCG\nGGATCT\nGGATGA\nGGATGC\nGGATGG\nGGATGT\nGGATTA\nGGATTC\nGGATTG\nGGATTT\nGGCAAA\nGGCAAC\nGGCAAG\nGGCAAT\nGGCACA\nGGCACC\nGGCACG\nGGCACT\nGGCAGA\nGGCAGC\nGGCAGG\nGGCAGT\nGGCATA\nGGCATC\nGGCATG\nGGCATT\nGGCCAA\nGGCCAC\nGGCCAG\nGGCCAT\nGGCCCA\nGGCCCC\nGGCCCG\nGGCCCT\nGGCCGA\nGGCCGC\nGGCCGG\nGGCCGT\nGGCCTA\nGGCCTC\nGGCCTG\nGGCCTT\nGGCGAA\nGGCGAC\nGGCGAG\nGGCGAT\nGGCGCA\nGGCGCC\nGGCGCG\nGGCGCT\nGGCGGA\nGGCGGC\nGGCGGG\nGGCGGT\nGGCGTA\nGGCGTC\nGGCGTG\nGGCGTT\nGGCTAA\nGGCTAC\nGGCTAG\nGGCTAT\nGGCTCA\nGGCTCC\nGGCTCG\nGGCTCT\nGGCTGA\nGGCTGC\nGGCTGG\nGGCTGT\nGGCTTA\nGGCTTC\nGGCTTG\nGGCTTT\nGGGAAA\nGGGAAC\nGGGAAG\nGGGAAT\nGGGACA\nGGGACC\nGGGACG\nGGGACT\nGGGAGA\nGGGAGC\nGGGAGG\nGGGAGT\nGGGATA\nGGGATC\nGGGATG\nGGGATT\nGGGCAA\nGGGCAC\nGGGCAG\nGGGCAT\nGGGCCA\nGGGCCC\nGGGCCG\nGGGCCT\nGGGCGA\nGGGCGC\nGGGCGG\nGGGCGT\nGGGCTA\nGGGCTC\nGGGCTG\nGGGCTT\nGGGGAA\nGGGGAC\nGGGGAG\nGGGGAT\nGGGGCA\nGGGGCC\nGGGGCG\nGGGGCT\nGGGGGA\nGGGGGC\nGGGGGG\nGGGGGT\nGGGGTA\nGGGGTC\nGGGGTG\nGGGGTT\nGGGTAA\nGGGTAC\nGGGTAG\nGGGTAT\nGGGTCA\nGGGTCC\nGGGTCG\nGGGTCT\nGGGTGA\nGGGTGC\nGGGTGG\nGGGTGT\nGGGTTA\nGGGTTC\nGGGTTG\nGGGTTT\nGGTAAA\nGGTAAC\nGGTAAG\nGGTAAT\nGGTACA\nGGTACC\nGGTACG\nGGTACT\nGGTAGA\nGGTAGC\nGGTAGG\nGGTAGT\nGGTATA\nGGTATC\nGGTATG\nGGTATT\nGGTCAA\nGGTCAC\nGGTCAG\nGGTCAT\nGGTCCA\nGGTCCC\nGGTCCG\nGGTCCT\nGGTCGA\nGGTCGC\nGGTCGG\nGGTCGT\nGGTCTA\nGGTCTC\nGGTCTG\nGGTCTT\nGGTGAA\nGGTGAC\nGGTGAG\nGGTGAT\nGGTGCA\nGGTGCC\nGGTGCG\nGGTGCT\nGGTGGA\nGGTGGC\nGGTGGG\nGGTGGT\nGGTGTA\nGGTGTC\nGGTGTG\nGGTGTT\nGGTTAA\nGGTTAC\nGGTTAG\nGGTTAT\nGGTTCA\nGGTTCC\nGGTTCG\nGGTTCT\nGGTTGA\nGGTTGC\nGGTTGG\nGGTTGT\nGGTTTA\nGGTTTC\nGGTTTG\nGGTTTT\nGTAAAA\nGTAAAC\nGTAAAG\nGTAAAT\nGTAACA\nGTAACC\nGTAACG\nGTAACT\nGTAAGA\nGTAAGC\nGTAAGG\nGTAAGT\nGTAATA\nGTAATC\nGTAATG\nGTAATT\nGTACAA\nGTACAC\nGTACAG\nGTACAT\nGTACCA\nGTACCC\nGTACCG\nGTACCT\nGTACGA\nGTACGC\nGTACGG\nGTACGT\nGTACTA\nGTACTC\nGTACTG\nGTACTT\nGTAGAA\nGTAGAC\nGTAGAG\nGTAGAT\nGTAGCA\nGTAGCC\nGTAGCG\nGTAGCT\nGTAGGA\nGTAGGC\nGTAGGG\nGTAGGT\nGTAGTA\nGTAGTC\nGTAGTG\nGTAGTT\nGTATAA\nGTATAC\nGTATAG\nGTATAT\nGTATCA\nGTATCC\nGTATCG\nGTATCT\nGTATGA\nGTATGC\nGTATGG\nGTATGT\nGTATTA\nGTATTC\nGTATTG\nGTATTT\nGTCAAA\nGTCAAC\nGTCAAG\nGTCAAT\nGTCACA\nGTCACC\nGTCACG\nGTCACT\nGTCAGA\nGTCAGC\nGTCAGG\nGTCAGT\nGTCATA\nGTCATC\nGTCATG\nGTCATT\nGTCCAA\nGTCCAC\nGTCCAG\nGTCCAT\nGTCCCA\nGTCCCC\nGTCCCG\nGTCCCT\nGTCCGA\nGTCCGC\nGTCCGG\nGTCCGT\nGTCCTA\nGTCCTC\nGTCCTG\nGTCCTT\nGTCGAA\nGTCGAC\nGTCGAG\nGTCGAT\nGTCGCA\nGTCGCC\nGTCGCG\nGTCGCT\nGTCGGA\nGTCGGC\nGTCGGG\nGTCGGT\nGTCGTA\nGTCGTC\nGTCGTG\nGTCGTT\nGTCTAA\nGTCTAC\nGTCTAG\nGTCTAT\nGTCTCA\nGTCTCC\nGTCTCG\nGTCTCT\nGTCTGA\nGTCTGC\nGTCTGG\nGTCTGT\nGTCTTA\nGTCTTC\nGTCTTG\nGTCTTT\nGTGAAA\nGTGAAC\nGTGAAG\nGTGAAT\nGTGACA\nGTGACC\nGTGACG\nGTGACT\nGTGAGA\nGTGAGC\nGTGAGG\nGTGAGT\nGTGATA\nGTGATC\nGTGATG\nGTGATT\nGTGCAA\nGTGCAC\nGTGCAG\nGTGCAT\nGTGCCA\nGTGCCC\nGTGCCG\nGTGCCT\nGTGCGA\nGTGCGC\nGTGCGG\nGTGCGT\nGTGCTA\nGTGCTC\nGTGCTG\nGTGCTT\nGTGGAA\nGTGGAC\nGTGGAG\nGTGGAT\nGTGGCA\nGTGGCC\nGTGGCG\nGTGGCT\nGTGGGA\nGTGGGC\nGTGGGG\nGTGGGT\nGTGGTA\nGTGGTC\nGTGGTG\nGTGGTT\nGTGTAA\nGTGTAC\nGTGTAG\nGTGTAT\nGTGTCA\nGTGTCC\nGTGTCG\nGTGTCT\nGTGTGA\nGTGTGC\nGTGTGG\nGTGTGT\nGTGTTA\nGTGTTC\nGTGTTG\nGTGTTT\nGTTAAA\nGTTAAC\nGTTAAG\nGTTAAT\nGTTACA\nGTTACC\nGTTACG\nGTTACT\nGTTAGA\nGTTAGC\nGTTAGG\nGTTAGT\nGTTATA\nGTTATC\nGTTATG\nGTTATT\nGTTCAA\nGTTCAC\nGTTCAG\nGTTCAT\nGTTCCA\nGTTCCC\nGTTCCG\nGTTCCT\nGTTCGA\nGTTCGC\nGTTCGG\nGTTCGT\nGTTCTA\nGTTCTC\nGTTCTG\nGTTCTT\nGTTGAA\nGTTGAC\nGTTGAG\nGTTGAT\nGTTGCA\nGTTGCC\nGTTGCG\nGTTGCT\nGTTGGA\nGTTGGC\nGTTGGG\nGTTGGT\nGTTGTA\nGTTGTC\nGTTGTG\nGTTGTT\nGTTTAA\nGTTTAC\nGTTTAG\nGTTTAT\nGTTTCA\nGTTTCC\nGTTTCG\nGTTTCT\nGTTTGA\nGTTTGC\nGTTTGG\nGTTTGT\nGTTTTA\nGTTTTC\nGTTTTG\nGTTTTT\nTAAAAA\nTAAAAC\nTAAAAG\nTAAAAT\nTAAACA\nTAAACC\nTAAACG\nTAAACT\nTAAAGA\nTAAAGC\nTAAAGG\nTAAAGT\nTAAATA\nTAAATC\nTAAATG\nTAAATT\nTAACAA\nTAACAC\nTAACAG\nTAACAT\nTAACCA\nTAACCC\nTAACCG\nTAACCT\nTAACGA\nTAACGC\nTAACGG\nTAACGT\nTAACTA\nTAACTC\nTAACTG\nTAACTT\nTAAGAA\nTAAGAC\nTAAGAG\nTAAGAT\nTAAGCA\nTAAGCC\nTAAGCG\nTAAGCT\nTAAGGA\nTAAGGC\nTAAGGG\nTAAGGT\nTAAGTA\nTAAGTC\nTAAGTG\nTAAGTT\nTAATAA\nTAATAC\nTAATAG\nTAATAT\nTAATCA\nTAATCC\nTAATCG\nTAATCT\nTAATGA\nTAATGC\nTAATGG\nTAATGT\nTAATTA\nTAATTC\nTAATTG\nTAATTT\nTACAAA\nTACAAC\nTACAAG\nTACAAT\nTACACA\nTACACC\nTACACG\nTACACT\nTACAGA\nTACAGC\nTACAGG\nTACAGT\nTACATA\nTACATC\nTACATG\nTACATT\nTACCAA\nTACCAC\nTACCAG\nTACCAT\nTACCCA\nTACCCC\nTACCCG\nTACCCT\nTACCGA\nTACCGC\nTACCGG\nTACCGT\nTACCTA\nTACCTC\nTACCTG\nTACCTT\nTACGAA\nTACGAC\nTACGAG\nTACGAT\nTACGCA\nTACGCC\nTACGCG\nTACGCT\nTACGGA\nTACGGC\nTACGGG\nTACGGT\nTACGTA\nTACGTC\nTACGTG\nTACGTT\nTACTAA\nTACTAC\nTACTAG\nTACTAT\nTACTCA\nTACTCC\nTACTCG\nTACTCT\nTACTGA\nTACTGC\nTACTGG\nTACTGT\nTACTTA\nTACTTC\nTACTTG\nTACTTT\nTAGAAA\nTAGAAC\nTAGAAG\nTAGAAT\nTAGACA\nTAGACC\nTAGACG\nTAGACT\nTAGAGA\nTAGAGC\nTAGAGG\nTAGAGT\nTAGATA\nTAGATC\nTAGATG\nTAGATT\nTAGCAA\nTAGCAC\nTAGCAG\nTAGCAT\nTAGCCA\nTAGCCC\nTAGCCG\nTAGCCT\nTAGCGA\nTAGCGC\nTAGCGG\nTAGCGT\nTAGCTA\nTAGCTC\nTAGCTG\nTAGCTT\nTAGGAA\nTAGGAC\nTAGGAG\nTAGGAT\nTAGGCA\nTAGGCC\nTAGGCG\nTAGGCT\nTAGGGA\nTAGGGC\nTAGGGG\nTAGGGT\nTAGGTA\nTAGGTC\nTAGGTG\nTAGGTT\nTAGTAA\nTAGTAC\nTAGTAG\nTAGTAT\nTAGTCA\nTAGTCC\nTAGTCG\nTAGTCT\nTAGTGA\nTAGTGC\nTAGTGG\nTAGTGT\nTAGTTA\nTAGTTC\nTAGTTG\nTAGTTT\nTATAAA\nTATAAC\nTATAAG\nTATAAT\nTATACA\nTATACC\nTATACG\nTATACT\nTATAGA\nTATAGC\nTATAGG\nTATAGT\nTATATA\nTATATC\nTATATG\nTATATT\nTATCAA\nTATCAC\nTATCAG\nTATCAT\nTATCCA\nTATCCC\nTATCCG\nTATCCT\nTATCGA\nTATCGC\nTATCGG\nTATCGT\nTATCTA\nTATCTC\nTATCTG\nTATCTT\nTATGAA\nTATGAC\nTATGAG\nTATGAT\nTATGCA\nTATGCC\nTATGCG\nTATGCT\nTATGGA\nTATGGC\nTATGGG\nTATGGT\nTATGTA\nTATGTC\nTATGTG\nTATGTT\nTATTAA\nTATTAC\nTATTAG\nTATTAT\nTATTCA\nTATTCC\nTATTCG\nTATTCT\nTATTGA\nTATTGC\nTATTGG\nTATTGT\nTATTTA\nTATTTC\nTATTTG\nTATTTT\nTCAAAA\nTCAAAC\nTCAAAG\nTCAAAT\nTCAACA\nTCAACC\nTCAACG\nTCAACT\nTCAAGA\nTCAAGC\nTCAAGG\nTCAAGT\nTCAATA\nTCAATC\nTCAATG\nTCAATT\nTCACAA\nTCACAC\nTCACAG\nTCACAT\nTCACCA\nTCACCC\nTCACCG\nTCACCT\nTCACGA\nTCACGC\nTCACGG\nTCACGT\nTCACTA\nTCACTC\nTCACTG\nTCACTT\nTCAGAA\nTCAGAC\nTCAGAG\nTCAGAT\nTCAGCA\nTCAGCC\nTCAGCG\nTCAGCT\nTCAGGA\nTCAGGC\nTCAGGG\nTCAGGT\nTCAGTA\nTCAGTC\nTCAGTG\nTCAGTT\nTCATAA\nTCATAC\nTCATAG\nTCATAT\nTCATCA\nTCATCC\nTCATCG\nTCATCT\nTCATGA\nTCATGC\nTCATGG\nTCATGT\nTCATTA\nTCATTC\nTCATTG\nTCATTT\nTCCAAA\nTCCAAC\nTCCAAG\nTCCAAT\nTCCACA\nTCCACC\nTCCACG\nTCCACT\nTCCAGA\nTCCAGC\nTCCAGG\nTCCAGT\nTCCATA\nTCCATC\nTCCATG\nTCCATT\nTCCCAA\nTCCCAC\nTCCCAG\nTCCCAT\nTCCCCA\nTCCCCC\nTCCCCG\nTCCCCT\nTCCCGA\nTCCCGC\nTCCCGG\nTCCCGT\nTCCCTA\nTCCCTC\nTCCCTG\nTCCCTT\nTCCGAA\nTCCGAC\nTCCGAG\nTCCGAT\nTCCGCA\nTCCGCC\nTCCGCG\nTCCGCT\nTCCGGA\nTCCGGC\nTCCGGG\nTCCGGT\nTCCGTA\nTCCGTC\nTCCGTG\nTCCGTT\nTCCTAA\nTCCTAC\nTCCTAG\nTCCTAT\nTCCTCA\nTCCTCC\nTCCTCG\nTCCTCT\nTCCTGA\nTCCTGC\nTCCTGG\nTCCTGT\nTCCTTA\nTCCTTC\nTCCTTG\nTCCTTT\nTCGAAA\nTCGAAC\nTCGAAG\nTCGAAT\nTCGACA\nTCGACC\nTCGACG\nTCGACT\nTCGAGA\nTCGAGC\nTCGAGG\nTCGAGT\nTCGATA\nTCGATC\nTCGATG\nTCGATT\nTCGCAA\nTCGCAC\nTCGCAG\nTCGCAT\nTCGCCA\nTCGCCC\nTCGCCG\nTCGCCT\nTCGCGA\nTCGCGC\nTCGCGG\nTCGCGT\nTCGCTA\nTCGCTC\nTCGCTG\nTCGCTT\nTCGGAA\nTCGGAC\nTCGGAG\nTCGGAT\nTCGGCA\nTCGGCC\nTCGGCG\nTCGGCT\nTCGGGA\nTCGGGC\nTCGGGG\nTCGGGT\nTCGGTA\nTCGGTC\nTCGGTG\nTCGGTT\nTCGTAA\nTCGTAC\nTCGTAG\nTCGTAT\nTCGTCA\nTCGTCC\nTCGTCG\nTCGTCT\nTCGTGA\nTCGTGC\nTCGTGG\nTCGTGT\nTCGTTA\nTCGTTC\nTCGTTG\nTCGTTT\nTCTAAA\nTCTAAC\nTCTAAG\nTCTAAT\nTCTACA\nTCTACC\nTCTACG\nTCTACT\nTCTAGA\nTCTAGC\nTCTAGG\nTCTAGT\nTCTATA\nTCTATC\nTCTATG\nTCTATT\nTCTCAA\nTCTCAC\nTCTCAG\nTCTCAT\nTCTCCA\nTCTCCC\nTCTCCG\nTCTCCT\nTCTCGA\nTCTCGC\nTCTCGG\nTCTCGT\nTCTCTA\nTCTCTC\nTCTCTG\nTCTCTT\nTCTGAA\nTCTGAC\nTCTGAG\nTCTGAT\nTCTGCA\nTCTGCC\nTCTGCG\nTCTGCT\nTCTGGA\nTCTGGC\nTCTGGG\nTCTGGT\nTCTGTA\nTCTGTC\nTCTGTG\nTCTGTT\nTCTTAA\nTCTTAC\nTCTTAG\nTCTTAT\nTCTTCA\nTCTTCC\nTCTTCG\nTCTTCT\nTCTTGA\nTCTTGC\nTCTTGG\nTCTTGT\nTCTTTA\nTCTTTC\nTCTTTG\nTCTTTT\nTGAAAA\nTGAAAC\nTGAAAG\nTGAAAT\nTGAACA\nTGAACC\nTGAACG\nTGAACT\nTGAAGA\nTGAAGC\nTGAAGG\nTGAAGT\nTGAATA\nTGAATC\nTGAATG\nTGAATT\nTGACAA\nTGACAC\nTGACAG\nTGACAT\nTGACCA\nTGACCC\nTGACCG\nTGACCT\nTGACGA\nTGACGC\nTGACGG\nTGACGT\nTGACTA\nTGACTC\nTGACTG\nTGACTT\nTGAGAA\nTGAGAC\nTGAGAG\nTGAGAT\nTGAGCA\nTGAGCC\nTGAGCG\nTGAGCT\nTGAGGA\nTGAGGC\nTGAGGG\nTGAGGT\nTGAGTA\nTGAGTC\nTGAGTG\nTGAGTT\nTGATAA\nTGATAC\nTGATAG\nTGATAT\nTGATCA\nTGATCC\nTGATCG\nTGATCT\nTGATGA\nTGATGC\nTGATGG\nTGATGT\nTGATTA\nTGATTC\nTGATTG\nTGATTT\nTGCAAA\nTGCAAC\nTGCAAG\nTGCAAT\nTGCACA\nTGCACC\nTGCACG\nTGCACT\nTGCAGA\nTGCAGC\nTGCAGG\nTGCAGT\nTGCATA\nTGCATC\nTGCATG\nTGCATT\nTGCCAA\nTGCCAC\nTGCCAG\nTGCCAT\nTGCCCA\nTGCCCC\nTGCCCG\nTGCCCT\nTGCCGA\nTGCCGC\nTGCCGG\nTGCCGT\nTGCCTA\nTGCCTC\nTGCCTG\nTGCCTT\nTGCGAA\nTGCGAC\nTGCGAG\nTGCGAT\nTGCGCA\nTGCGCC\nTGCGCG\nTGCGCT\nTGCGGA\nTGCGGC\nTGCGGG\nTGCGGT\nTGCGTA\nTGCGTC\nTGCGTG\nTGCGTT\nTGCTAA\nTGCTAC\nTGCTAG\nTGCTAT\nTGCTCA\nTGCTCC\nTGCTCG\nTGCTCT\nTGCTGA\nTGCTGC\nTGCTGG\nTGCTGT\nTGCTTA\nTGCTTC\nTGCTTG\nTGCTTT\nTGGAAA\nTGGAAC\nTGGAAG\nTGGAAT\nTGGACA\nTGGACC\nTGGACG\nTGGACT\nTGGAGA\nTGGAGC\nTGGAGG\nTGGAGT\nTGGATA\nTGGATC\nTGGATG\nTGGATT\nTGGCAA\nTGGCAC\nTGGCAG\nTGGCAT\nTGGCCA\nTGGCCC\nTGGCCG\nTGGCCT\nTGGCGA\nTGGCGC\nTGGCGG\nTGGCGT\nTGGCTA\nTGGCTC\nTGGCTG\nTGGCTT\nTGGGAA\nTGGGAC\nTGGGAG\nTGGGAT\nTGGGCA\nTGGGCC\nTGGGCG\nTGGGCT\nTGGGGA\nTGGGGC\nTGGGGG\nTGGGGT\nTGGGTA\nTGGGTC\nTGGGTG\nTGGGTT\nTGGTAA\nTGGTAC\nTGGTAG\nTGGTAT\nTGGTCA\nTGGTCC\nTGGTCG\nTGGTCT\nTGGTGA\nTGGTGC\nTGGTGG\nTGGTGT\nTGGTTA\nTGGTTC\nTGGTTG\nTGGTTT\nTGTAAA\nTGTAAC\nTGTAAG\nTGTAAT\nTGTACA\nTGTACC\nTGTACG\nTGTACT\nTGTAGA\nTGTAGC\nTGTAGG\nTGTAGT\nTGTATA\nTGTATC\nTGTATG\nTGTATT\nTGTCAA\nTGTCAC\nTGTCAG\nTGTCAT\nTGTCCA\nTGTCCC\nTGTCCG\nTGTCCT\nTGTCGA\nTGTCGC\nTGTCGG\nTGTCGT\nTGTCTA\nTGTCTC\nTGTCTG\nTGTCTT\nTGTGAA\nTGTGAC\nTGTGAG\nTGTGAT\nTGTGCA\nTGTGCC\nTGTGCG\nTGTGCT\nTGTGGA\nTGTGGC\nTGTGGG\nTGTGGT\nTGTGTA\nTGTGTC\nTGTGTG\nTGTGTT\nTGTTAA\nTGTTAC\nTGTTAG\nTGTTAT\nTGTTCA\nTGTTCC\nTGTTCG\nTGTTCT\nTGTTGA\nTGTTGC\nTGTTGG\nTGTTGT\nTGTTTA\nTGTTTC\nTGTTTG\nTGTTTT\nTTAAAA\nTTAAAC\nTTAAAG\nTTAAAT\nTTAACA\nTTAACC\nTTAACG\nTTAACT\nTTAAGA\nTTAAGC\nTTAAGG\nTTAAGT\nTTAATA\nTTAATC\nTTAATG\nTTAATT\nTTACAA\nTTACAC\nTTACAG\nTTACAT\nTTACCA\nTTACCC\nTTACCG\nTTACCT\nTTACGA\nTTACGC\nTTACGG\nTTACGT\nTTACTA\nTTACTC\nTTACTG\nTTACTT\nTTAGAA\nTTAGAC\nTTAGAG\nTTAGAT\nTTAGCA\nTTAGCC\nTTAGCG\nTTAGCT\nTTAGGA\nTTAGGC\nTTAGGG\nTTAGGT\nTTAGTA\nTTAGTC\nTTAGTG\nTTAGTT\nTTATAA\nTTATAC\nTTATAG\nTTATAT\nTTATCA\nTTATCC\nTTATCG\nTTATCT\nTTATGA\nTTATGC\nTTATGG\nTTATGT\nTTATTA\nTTATTC\nTTATTG\nTTATTT\nTTCAAA\nTTCAAC\nTTCAAG\nTTCAAT\nTTCACA\nTTCACC\nTTCACG\nTTCACT\nTTCAGA\nTTCAGC\nTTCAGG\nTTCAGT\nTTCATA\nTTCATC\nTTCATG\nTTCATT\nTTCCAA\nTTCCAC\nTTCCAG\nTTCCAT\nTTCCCA\nTTCCCC\nTTCCCG\nTTCCCT\nTTCCGA\nTTCCGC\nTTCCGG\nTTCCGT\nTTCCTA\nTTCCTC\nTTCCTG\nTTCCTT\nTTCGAA\nTTCGAC\nTTCGAG\nTTCGAT\nTTCGCA\nTTCGCC\nTTCGCG\nTTCGCT\nTTCGGA\nTTCGGC\nTTCGGG\nTTCGGT\nTTCGTA\nTTCGTC\nTTCGTG\nTTCGTT\nTTCTAA\nTTCTAC\nTTCTAG\nTTCTAT\nTTCTCA\nTTCTCC\nTTCTCG\nTTCTCT\nTTCTGA\nTTCTGC\nTTCTGG\nTTCTGT\nTTCTTA\nTTCTTC\nTTCTTG\nTTCTTT\nTTGAAA\nTTGAAC\nTTGAAG\nTTGAAT\nTTGACA\nTTGACC\nTTGACG\nTTGACT\nTTGAGA\nTTGAGC\nTTGAGG\nTTGAGT\nTTGATA\nTTGATC\nTTGATG\nTTGATT\nTTGCAA\nTTGCAC\nTTGCAG\nTTGCAT\nTTGCCA\nTTGCCC\nTTGCCG\nTTGCCT\nTTGCGA\nTTGCGC\nTTGCGG\nTTGCGT\nTTGCTA\nTTGCTC\nTTGCTG\nTTGCTT\nTTGGAA\nTTGGAC\nTTGGAG\nTTGGAT\nTTGGCA\nTTGGCC\nTTGGCG\nTTGGCT\nTTGGGA\nTTGGGC\nTTGGGG\nTTGGGT\nTTGGTA\nTTGGTC\nTTGGTG\nTTGGTT\nTTGTAA\nTTGTAC\nTTGTAG\nTTGTAT\nTTGTCA\nTTGTCC\nTTGTCG\nTTGTCT\nTTGTGA\nTTGTGC\nTTGTGG\nTTGTGT\nTTGTTA\nTTGTTC\nTTGTTG\nTTGTTT\nTTTAAA\nTTTAAC\nTTTAAG\nTTTAAT\nTTTACA\nTTTACC\nTTTACG\nTTTACT\nTTTAGA\nTTTAGC\nTTTAGG\nTTTAGT\nTTTATA\nTTTATC\nTTTATG\nTTTATT\nTTTCAA\nTTTCAC\nTTTCAG\nTTTCAT\nTTTCCA\nTTTCCC\nTTTCCG\nTTTCCT\nTTTCGA\nTTTCGC\nTTTCGG\nTTTCGT\nTTTCTA\nTTTCTC\nTTTCTG\nTTTCTT\nTTTGAA\nTTTGAC\nTTTGAG\nTTTGAT\nTTTGCA\nTTTGCC\nTTTGCG\nTTTGCT\nTTTGGA\nTTTGGC\nTTTGGG\nTTTGGT\nTTTGTA\nTTTGTC\nTTTGTG\nTTTGTT\nTTTTAA\nTTTTAC\nTTTTAG\nTTTTAT\nTTTTCA\nTTTTCC\nTTTTCG\nTTTTCT\nTTTTGA\nTTTTGC\nTTTTGG\nTTTTGT\nTTTTTA\nTTTTTC\nTTTTTG\nTTTTTT\nNNNNNN\n
|