HERIUN's picture
Upload tokenizer
2ecd5ef verified
raw
history blame
12.3 kB
{
"[PAD]": 887,
"[UNK]": 886,
"k": 1,
"|": 0,
"가": 2,
"각": 3,
"간": 4,
"갈": 5,
"감": 6,
"갑": 7,
"값": 8,
"갔": 9,
"강": 10,
"갖": 11,
"같": 12,
"갚": 13,
"갛": 14,
"개": 15,
"객": 16,
"갤": 17,
"거": 18,
"걱": 19,
"건": 20,
"걷": 21,
"걸": 22,
"검": 23,
"겁": 24,
"것": 25,
"게": 26,
"겠": 27,
"겨": 28,
"격": 29,
"견": 30,
"결": 31,
"겼": 32,
"경": 33,
"곁": 34,
"계": 35,
"고": 36,
"곡": 37,
"곤": 38,
"곧": 39,
"골": 40,
"곰": 41,
"곱": 42,
"곳": 43,
"공": 44,
"과": 45,
"관": 46,
"광": 47,
"괜": 48,
"괴": 49,
"굉": 50,
"교": 51,
"구": 52,
"국": 53,
"군": 54,
"굳": 55,
"굴": 56,
"굽": 57,
"궁": 58,
"권": 59,
"귀": 60,
"귓": 61,
"규": 62,
"그": 63,
"근": 64,
"글": 65,
"긁": 66,
"금": 67,
"급": 68,
"긋": 69,
"기": 70,
"긴": 71,
"길": 72,
"김": 73,
"깃": 74,
"깊": 75,
"까": 76,
"깍": 77,
"깐": 78,
"깔": 79,
"깜": 80,
"깨": 81,
"꺼": 82,
"껀": 83,
"껄": 84,
"껏": 85,
"껑": 86,
"께": 87,
"껴": 88,
"꼈": 89,
"꼬": 90,
"꼭": 91,
"꼰": 92,
"꼴": 93,
"꽁": 94,
"꽃": 95,
"꽉": 96,
"꾀": 97,
"꾸": 98,
"꾹": 99,
"꾼": 100,
"꿇": 101,
"꿈": 102,
"끄": 103,
"끈": 104,
"끊": 105,
"끌": 106,
"끓": 107,
"끔": 108,
"끗": 109,
"끝": 110,
"끼": 111,
"끽": 112,
"나": 113,
"낙": 114,
"난": 115,
"날": 116,
"남": 117,
"납": 118,
"났": 119,
"낮": 120,
"낯": 121,
"낱": 122,
"낳": 123,
"내": 124,
"낸": 125,
"냄": 126,
"냈": 127,
"냉": 128,
"냐": 129,
"냘": 130,
"냥": 131,
"너": 132,
"넉": 133,
"넌": 134,
"널": 135,
"넘": 136,
"넛": 137,
"넣": 138,
"네": 139,
"녀": 140,
"녁": 141,
"년": 142,
"념": 143,
"녕": 144,
"노": 145,
"녹": 146,
"논": 147,
"놀": 148,
"놈": 149,
"농": 150,
"높": 151,
"놓": 152,
"놔": 153,
"뇌": 154,
"누": 155,
"눈": 156,
"눌": 157,
"눔": 158,
"눕": 159,
"느": 160,
"는": 161,
"늘": 162,
"늙": 163,
"능": 164,
"늦": 165,
"니": 166,
"닉": 167,
"닌": 168,
"닐": 169,
"님": 170,
"다": 171,
"닥": 172,
"닦": 173,
"단": 174,
"닫": 175,
"달": 176,
"닭": 177,
"닮": 178,
"담": 179,
"답": 180,
"닷": 181,
"당": 182,
"닿": 183,
"대": 184,
"댓": 185,
"더": 186,
"덕": 187,
"던": 188,
"덤": 189,
"덥": 190,
"덧": 191,
"덮": 192,
"데": 193,
"덴": 194,
"도": 195,
"독": 196,
"돈": 197,
"돋": 198,
"돌": 199,
"동": 200,
"돼": 201,
"됐": 202,
"되": 203,
"된": 204,
"될": 205,
"됨": 206,
"됩": 207,
"두": 208,
"둑": 209,
"둘": 210,
"둣": 211,
"둥": 212,
"뒤": 213,
"뒷": 214,
"드": 215,
"득": 216,
"든": 217,
"듣": 218,
"들": 219,
"듬": 220,
"듭": 221,
"듯": 222,
"등": 223,
"디": 224,
"딘": 225,
"딜": 226,
"딧": 227,
"딪": 228,
"따": 229,
"딱": 230,
"딴": 231,
"딸": 232,
"땀": 233,
"땅": 234,
"때": 235,
"떠": 236,
"떡": 237,
"떤": 238,
"떨": 239,
"떴": 240,
"떻": 241,
"떼": 242,
"또": 243,
"똑": 244,
"똥": 245,
"뚜": 246,
"뚝": 247,
"뚫": 248,
"뚱": 249,
"뛰": 250,
"뜨": 251,
"뜩": 252,
"뜻": 253,
"띄": 254,
"띠": 255,
"띤": 256,
"라": 257,
"락": 258,
"란": 259,
"랄": 260,
"람": 261,
"랍": 262,
"랐": 263,
"랑": 264,
"랗": 265,
"래": 266,
"랫": 267,
"랬": 268,
"랴": 269,
"략": 270,
"량": 271,
"러": 272,
"럭": 273,
"런": 274,
"럴": 275,
"럼": 276,
"럽": 277,
"렀": 278,
"렁": 279,
"렇": 280,
"레": 281,
"렌": 282,
"려": 283,
"력": 284,
"련": 285,
"렷": 286,
"렸": 287,
"령": 288,
"례": 289,
"로": 290,
"록": 291,
"론": 292,
"롬": 293,
"롭": 294,
"롯": 295,
"뢰": 296,
"료": 297,
"룡": 298,
"루": 299,
"룩": 300,
"룹": 301,
"룽": 302,
"률": 303,
"륭": 304,
"르": 305,
"른": 306,
"를": 307,
"름": 308,
"릅": 309,
"릇": 310,
"릉": 311,
"릎": 312,
"리": 313,
"린": 314,
"릴": 315,
"림": 316,
"립": 317,
"릿": 318,
"마": 319,
"막": 320,
"만": 321,
"많": 322,
"말": 323,
"맘": 324,
"맙": 325,
"맛": 326,
"망": 327,
"맞": 328,
"맡": 329,
"매": 330,
"맨": 331,
"맹": 332,
"맺": 333,
"머": 334,
"먹": 335,
"먼": 336,
"멀": 337,
"멈": 338,
"멍": 339,
"메": 340,
"며": 341,
"면": 342,
"명": 343,
"몇": 344,
"모": 345,
"목": 346,
"몬": 347,
"몰": 348,
"몸": 349,
"몹": 350,
"못": 351,
"몽": 352,
"무": 353,
"묵": 354,
"문": 355,
"묻": 356,
"물": 357,
"뭇": 358,
"뭉": 359,
"뭍": 360,
"뭐": 361,
"뭘": 362,
"뮤": 363,
"므": 364,
"미": 365,
"민": 366,
"믿": 367,
"밀": 368,
"밑": 369,
"바": 370,
"박": 371,
"밖": 372,
"반": 373,
"받": 374,
"발": 375,
"밝": 376,
"밤": 377,
"밥": 378,
"방": 379,
"밭": 380,
"배": 381,
"백": 382,
"뱀": 383,
"뱁": 384,
"뱃": 385,
"뱄": 386,
"뱅": 387,
"버": 388,
"벅": 389,
"번": 390,
"벌": 391,
"범": 392,
"법": 393,
"벗": 394,
"베": 395,
"벼": 396,
"벽": 397,
"변": 398,
"별": 399,
"볍": 400,
"병": 401,
"볕": 402,
"보": 403,
"복": 404,
"본": 405,
"볼": 406,
"봄": 407,
"봅": 408,
"봉": 409,
"봐": 410,
"봤": 411,
"뵈": 412,
"부": 413,
"북": 414,
"분": 415,
"불": 416,
"붉": 417,
"붐": 418,
"붙": 419,
"비": 420,
"빈": 421,
"빌": 422,
"빙": 423,
"빚": 424,
"빛": 425,
"빠": 426,
"빨": 427,
"빼": 428,
"뺄": 429,
"뺑": 430,
"뺨": 431,
"뻔": 432,
"뻘": 433,
"뼈": 434,
"뼉": 435,
"뽑": 436,
"뾰": 437,
"뿌": 438,
"뿐": 439,
"뿡": 440,
"쁘": 441,
"사": 442,
"산": 443,
"살": 444,
"삶": 445,
"삼": 446,
"삿": 447,
"상": 448,
"새": 449,
"색": 450,
"샘": 451,
"생": 452,
"서": 453,
"석": 454,
"섞": 455,
"선": 456,
"설": 457,
"섬": 458,
"섭": 459,
"섯": 460,
"섰": 461,
"성": 462,
"세": 463,
"센": 464,
"셈": 465,
"셔": 466,
"셨": 467,
"소": 468,
"속": 469,
"손": 470,
"솔": 471,
"솜": 472,
"솟": 473,
"송": 474,
"쇼": 475,
"수": 476,
"숙": 477,
"순": 478,
"술": 479,
"숨": 480,
"숫": 481,
"쉬": 482,
"쉴": 483,
"스": 484,
"슥": 485,
"슨": 486,
"슬": 487,
"슴": 488,
"습": 489,
"슷": 490,
"승": 491,
"시": 492,
"식": 493,
"신": 494,
"실": 495,
"싫": 496,
"심": 497,
"십": 498,
"싯": 499,
"싶": 500,
"싸": 501,
"싹": 502,
"쌀": 503,
"쌍": 504,
"쌓": 505,
"써": 506,
"썩": 507,
"썼": 508,
"쎄": 509,
"쏘": 510,
"쏟": 511,
"쏴": 512,
"쑥": 513,
"쓰": 514,
"쓴": 515,
"쓸": 516,
"씀": 517,
"씌": 518,
"씨": 519,
"씩": 520,
"씬": 521,
"씻": 522,
"아": 523,
"악": 524,
"안": 525,
"앉": 526,
"않": 527,
"알": 528,
"암": 529,
"았": 530,
"앙": 531,
"앞": 532,
"애": 533,
"액": 534,
"앤": 535,
"야": 536,
"약": 537,
"양": 538,
"얕": 539,
"어": 540,
"억": 541,
"언": 542,
"얹": 543,
"얻": 544,
"얼": 545,
"얽": 546,
"엄": 547,
"업": 548,
"없": 549,
"엇": 550,
"었": 551,
"엉": 552,
"엌": 553,
"엎": 554,
"에": 555,
"엔": 556,
"엘": 557,
"여": 558,
"역": 559,
"연": 560,
"열": 561,
"엷": 562,
"염": 563,
"였": 564,
"영": 565,
"옆": 566,
"예": 567,
"옛": 568,
"오": 569,
"옥": 570,
"온": 571,
"올": 572,
"옮": 573,
"옵": 574,
"옷": 575,
"와": 576,
"완": 577,
"왔": 578,
"왕": 579,
"왜": 580,
"외": 581,
"요": 582,
"욕": 583,
"용": 584,
"우": 585,
"욱": 586,
"운": 587,
"울": 588,
"움": 589,
"웃": 590,
"웅": 591,
"워": 592,
"원": 593,
"월": 594,
"웠": 595,
"웬": 596,
"위": 597,
"윗": 598,
"유": 599,
"육": 600,
"윤": 601,
"으": 602,
"은": 603,
"을": 604,
"음": 605,
"읍": 606,
"응": 607,
"의": 608,
"이": 609,
"인": 610,
"일": 611,
"읽": 612,
"잃": 613,
"임": 614,
"입": 615,
"잇": 616,
"있": 617,
"잊": 618,
"자": 619,
"작": 620,
"잔": 621,
"잖": 622,
"잘": 623,
"잠": 624,
"잡": 625,
"장": 626,
"잦": 627,
"재": 628,
"쟁": 629,
"저": 630,
"적": 631,
"전": 632,
"절": 633,
"젊": 634,
"점": 635,
"젓": 636,
"정": 637,
"제": 638,
"젯": 639,
"져": 640,
"졌": 641,
"조": 642,
"족": 643,
"존": 644,
"졸": 645,
"좀": 646,
"좁": 647,
"종": 648,
"좋": 649,
"좌": 650,
"죄": 651,
"죠": 652,
"주": 653,
"죽": 654,
"준": 655,
"줄": 656,
"줌": 657,
"중": 658,
"줘": 659,
"쥐": 660,
"쥔": 661,
"즈": 662,
"즉": 663,
"즐": 664,
"즘": 665,
"증": 666,
"지": 667,
"직": 668,
"진": 669,
"질": 670,
"짐": 671,
"집": 672,
"짓": 673,
"징": 674,
"짖": 675,
"짚": 676,
"짜": 677,
"짝": 678,
"째": 679,
"쩌": 680,
"쩍": 681,
"쪼": 682,
"쪽": 683,
"쫑": 684,
"쫓": 685,
"쫙": 686,
"쭉": 687,
"쯤": 688,
"찌": 689,
"찔": 690,
"찡": 691,
"찢": 692,
"차": 693,
"착": 694,
"찬": 695,
"찮": 696,
"찰": 697,
"참": 698,
"찼": 699,
"창": 700,
"찾": 701,
"채": 702,
"책": 703,
"챘": 704,
"챙": 705,
"처": 706,
"척": 707,
"천": 708,
"철": 709,
"첨": 710,
"첩": 711,
"첫": 712,
"청": 713,
"체": 714,
"쳐": 715,
"쳤": 716,
"초": 717,
"촌": 718,
"촛": 719,
"총": 720,
"최": 721,
"추": 722,
"축": 723,
"춘": 724,
"출": 725,
"춤": 726,
"충": 727,
"춰": 728,
"취": 729,
"츠": 730,
"측": 731,
"츰": 732,
"층": 733,
"치": 734,
"칙": 735,
"친": 736,
"칠": 737,
"침": 738,
"칭": 739,
"카": 740,
"칵": 741,
"칼": 742,
"커": 743,
"컥": 744,
"컨": 745,
"컬": 746,
"케": 747,
"켓": 748,
"켕": 749,
"켜": 750,
"켰": 751,
"코": 752,
"콘": 753,
"쾌": 754,
"쿵": 755,
"퀴": 756,
"크": 757,
"큰": 758,
"클": 759,
"큼": 760,
"키": 761,
"킥": 762,
"킬": 763,
"타": 764,
"탄": 765,
"탐": 766,
"탓": 767,
"탔": 768,
"탕": 769,
"태": 770,
"택": 771,
"터": 772,
"턱": 773,
"털": 774,
"테": 775,
"텐": 776,
"토": 777,
"통": 778,
"퇴": 779,
"투": 780,
"툭": 781,
"툼": 782,
"퉁": 783,
"튀": 784,
"트": 785,
"특": 786,
"튼": 787,
"튿": 788,
"틀": 789,
"틈": 790,
"티": 791,
"파": 792,
"판": 793,
"팔": 794,
"팠": 795,
"팡": 796,
"패": 797,
"팽": 798,
"퍼": 799,
"페": 800,
"편": 801,
"폈": 802,
"평": 803,
"포": 804,
"폭": 805,
"폰": 806,
"폿": 807,
"표": 808,
"푸": 809,
"푹": 810,
"푼": 811,
"풀": 812,
"품": 813,
"풋": 814,
"프": 815,
"픈": 816,
"플": 817,
"픔": 818,
"피": 819,
"픽": 820,
"필": 821,
"핍": 822,
"핏": 823,
"핑": 824,
"하": 825,
"학": 826,
"한": 827,
"할": 828,
"함": 829,
"합": 830,
"항": 831,
"해": 832,
"핵": 833,
"햇": 834,
"했": 835,
"행": 836,
"향": 837,
"허": 838,
"헌": 839,
"헛": 840,
"헤": 841,
"혀": 842,
"혁": 843,
"현": 844,
"혔": 845,
"형": 846,
"혜": 847,
"호": 848,
"혹": 849,
"혼": 850,
"홀": 851,
"홍": 852,
"화": 853,
"확": 854,
"환": 855,
"활": 856,
"황": 857,
"회": 858,
"획": 859,
"횡": 860,
"효": 861,
"후": 862,
"훈": 863,
"훌": 864,
"훔": 865,
"훤": 866,
"훨": 867,
"휘": 868,
"휙": 869,
"휩": 870,
"흉": 871,
"흐": 872,
"흑": 873,
"흔": 874,
"흘": 875,
"흙": 876,
"흡": 877,
"흥": 878,
"흩": 879,
"희": 880,
"흰": 881,
"히": 882,
"힌": 883,
"힐": 884,
"힘": 885
}