Upload tokenizer
1b0c5d3
verified
|
{ |
|
"version": "1.0", |
|
"truncation": null, |
|
"padding": null, |
|
"added_tokens": [ |
|
{ |
|
"id": 0, |
|
"content": "<MASK>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 1, |
|
"content": "<CLS>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 2, |
|
"content": "<UNK>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
} |
|
], |
|
"normalizer": null, |
|
"pre_tokenizer": { |
|
"type": "Whitespace" |
|
}, |
|
"post_processor": null, |
|
"decoder": null, |
|
"model": { |
|
"type": "WordLevel", |
|
"vocab": { |
|
"<MASK>": 0, |
|
"<CLS>": 1, |
|
"<UNK>": 2, |
|
"AAAA": 3, |
|
"AAAC": 4, |
|
"AAAG": 5, |
|
"AAAT": 6, |
|
"AACA": 7, |
|
"AACC": 8, |
|
"AACG": 9, |
|
"AACT": 10, |
|
"AAGA": 11, |
|
"AAGC": 12, |
|
"AAGG": 13, |
|
"AAGT": 14, |
|
"AATA": 15, |
|
"AATC": 16, |
|
"AATG": 17, |
|
"AATT": 18, |
|
"ACAA": 19, |
|
"ACAC": 20, |
|
"ACAG": 21, |
|
"ACAT": 22, |
|
"ACCA": 23, |
|
"ACCC": 24, |
|
"ACCG": 25, |
|
"ACCT": 26, |
|
"ACGA": 27, |
|
"ACGC": 28, |
|
"ACGG": 29, |
|
"ACGT": 30, |
|
"ACTA": 31, |
|
"ACTC": 32, |
|
"ACTG": 33, |
|
"ACTT": 34, |
|
"AGAA": 35, |
|
"AGAC": 36, |
|
"AGAG": 37, |
|
"AGAT": 38, |
|
"AGCA": 39, |
|
"AGCC": 40, |
|
"AGCG": 41, |
|
"AGCT": 42, |
|
"AGGA": 43, |
|
"AGGC": 44, |
|
"AGGG": 45, |
|
"AGGT": 46, |
|
"AGTA": 47, |
|
"AGTC": 48, |
|
"AGTG": 49, |
|
"AGTT": 50, |
|
"ATAA": 51, |
|
"ATAC": 52, |
|
"ATAG": 53, |
|
"ATAT": 54, |
|
"ATCA": 55, |
|
"ATCC": 56, |
|
"ATCG": 57, |
|
"ATCT": 58, |
|
"ATGA": 59, |
|
"ATGC": 60, |
|
"ATGG": 61, |
|
"ATGT": 62, |
|
"ATTA": 63, |
|
"ATTC": 64, |
|
"ATTG": 65, |
|
"ATTT": 66, |
|
"CAAA": 67, |
|
"CAAC": 68, |
|
"CAAG": 69, |
|
"CAAT": 70, |
|
"CACA": 71, |
|
"CACC": 72, |
|
"CACG": 73, |
|
"CACT": 74, |
|
"CAGA": 75, |
|
"CAGC": 76, |
|
"CAGG": 77, |
|
"CAGT": 78, |
|
"CATA": 79, |
|
"CATC": 80, |
|
"CATG": 81, |
|
"CATT": 82, |
|
"CCAA": 83, |
|
"CCAC": 84, |
|
"CCAG": 85, |
|
"CCAT": 86, |
|
"CCCA": 87, |
|
"CCCC": 88, |
|
"CCCG": 89, |
|
"CCCT": 90, |
|
"CCGA": 91, |
|
"CCGC": 92, |
|
"CCGG": 93, |
|
"CCGT": 94, |
|
"CCTA": 95, |
|
"CCTC": 96, |
|
"CCTG": 97, |
|
"CCTT": 98, |
|
"CGAA": 99, |
|
"CGAC": 100, |
|
"CGAG": 101, |
|
"CGAT": 102, |
|
"CGCA": 103, |
|
"CGCC": 104, |
|
"CGCG": 105, |
|
"CGCT": 106, |
|
"CGGA": 107, |
|
"CGGC": 108, |
|
"CGGG": 109, |
|
"CGGT": 110, |
|
"CGTA": 111, |
|
"CGTC": 112, |
|
"CGTG": 113, |
|
"CGTT": 114, |
|
"CTAA": 115, |
|
"CTAC": 116, |
|
"CTAG": 117, |
|
"CTAT": 118, |
|
"CTCA": 119, |
|
"CTCC": 120, |
|
"CTCG": 121, |
|
"CTCT": 122, |
|
"CTGA": 123, |
|
"CTGC": 124, |
|
"CTGG": 125, |
|
"CTGT": 126, |
|
"CTTA": 127, |
|
"CTTC": 128, |
|
"CTTG": 129, |
|
"CTTT": 130, |
|
"GAAA": 131, |
|
"GAAC": 132, |
|
"GAAG": 133, |
|
"GAAT": 134, |
|
"GACA": 135, |
|
"GACC": 136, |
|
"GACG": 137, |
|
"GACT": 138, |
|
"GAGA": 139, |
|
"GAGC": 140, |
|
"GAGG": 141, |
|
"GAGT": 142, |
|
"GATA": 143, |
|
"GATC": 144, |
|
"GATG": 145, |
|
"GATT": 146, |
|
"GCAA": 147, |
|
"GCAC": 148, |
|
"GCAG": 149, |
|
"GCAT": 150, |
|
"GCCA": 151, |
|
"GCCC": 152, |
|
"GCCG": 153, |
|
"GCCT": 154, |
|
"GCGA": 155, |
|
"GCGC": 156, |
|
"GCGG": 157, |
|
"GCGT": 158, |
|
"GCTA": 159, |
|
"GCTC": 160, |
|
"GCTG": 161, |
|
"GCTT": 162, |
|
"GGAA": 163, |
|
"GGAC": 164, |
|
"GGAG": 165, |
|
"GGAT": 166, |
|
"GGCA": 167, |
|
"GGCC": 168, |
|
"GGCG": 169, |
|
"GGCT": 170, |
|
"GGGA": 171, |
|
"GGGC": 172, |
|
"GGGG": 173, |
|
"GGGT": 174, |
|
"GGTA": 175, |
|
"GGTC": 176, |
|
"GGTG": 177, |
|
"GGTT": 178, |
|
"GTAA": 179, |
|
"GTAC": 180, |
|
"GTAG": 181, |
|
"GTAT": 182, |
|
"GTCA": 183, |
|
"GTCC": 184, |
|
"GTCG": 185, |
|
"GTCT": 186, |
|
"GTGA": 187, |
|
"GTGC": 188, |
|
"GTGG": 189, |
|
"GTGT": 190, |
|
"GTTA": 191, |
|
"GTTC": 192, |
|
"GTTG": 193, |
|
"GTTT": 194, |
|
"TAAA": 195, |
|
"TAAC": 196, |
|
"TAAG": 197, |
|
"TAAT": 198, |
|
"TACA": 199, |
|
"TACC": 200, |
|
"TACG": 201, |
|
"TACT": 202, |
|
"TAGA": 203, |
|
"TAGC": 204, |
|
"TAGG": 205, |
|
"TAGT": 206, |
|
"TATA": 207, |
|
"TATC": 208, |
|
"TATG": 209, |
|
"TATT": 210, |
|
"TCAA": 211, |
|
"TCAC": 212, |
|
"TCAG": 213, |
|
"TCAT": 214, |
|
"TCCA": 215, |
|
"TCCC": 216, |
|
"TCCG": 217, |
|
"TCCT": 218, |
|
"TCGA": 219, |
|
"TCGC": 220, |
|
"TCGG": 221, |
|
"TCGT": 222, |
|
"TCTA": 223, |
|
"TCTC": 224, |
|
"TCTG": 225, |
|
"TCTT": 226, |
|
"TGAA": 227, |
|
"TGAC": 228, |
|
"TGAG": 229, |
|
"TGAT": 230, |
|
"TGCA": 231, |
|
"TGCC": 232, |
|
"TGCG": 233, |
|
"TGCT": 234, |
|
"TGGA": 235, |
|
"TGGC": 236, |
|
"TGGG": 237, |
|
"TGGT": 238, |
|
"TGTA": 239, |
|
"TGTC": 240, |
|
"TGTG": 241, |
|
"TGTT": 242, |
|
"TTAA": 243, |
|
"TTAC": 244, |
|
"TTAG": 245, |
|
"TTAT": 246, |
|
"TTCA": 247, |
|
"TTCC": 248, |
|
"TTCG": 249, |
|
"TTCT": 250, |
|
"TTGA": 251, |
|
"TTGC": 252, |
|
"TTGG": 253, |
|
"TTGT": 254, |
|
"TTTA": 255, |
|
"TTTC": 256, |
|
"TTTG": 257, |
|
"TTTT": 258 |
|
}, |
|
"unk_token": "<UNK>" |
|
} |
|
} |