HERIUN's picture
Upload tokenizer
1ef39d5 verified
raw
history blame
12.3 kB
{
"[PAD]": 886,
"[UNK]": 885,
"|": 0,
"가": 1,
"각": 2,
"간": 3,
"갈": 4,
"감": 5,
"갑": 6,
"값": 7,
"갔": 8,
"강": 9,
"갖": 10,
"같": 11,
"갚": 12,
"갛": 13,
"개": 14,
"객": 15,
"갤": 16,
"거": 17,
"걱": 18,
"건": 19,
"걷": 20,
"걸": 21,
"검": 22,
"겁": 23,
"것": 24,
"게": 25,
"겠": 26,
"겨": 27,
"격": 28,
"견": 29,
"결": 30,
"겼": 31,
"경": 32,
"곁": 33,
"계": 34,
"고": 35,
"곡": 36,
"곤": 37,
"곧": 38,
"골": 39,
"곰": 40,
"곱": 41,
"곳": 42,
"공": 43,
"과": 44,
"관": 45,
"광": 46,
"괜": 47,
"괴": 48,
"굉": 49,
"교": 50,
"구": 51,
"국": 52,
"군": 53,
"굳": 54,
"굴": 55,
"굽": 56,
"궁": 57,
"권": 58,
"귀": 59,
"귓": 60,
"규": 61,
"그": 62,
"근": 63,
"글": 64,
"긁": 65,
"금": 66,
"급": 67,
"긋": 68,
"기": 69,
"긴": 70,
"길": 71,
"김": 72,
"깃": 73,
"깊": 74,
"까": 75,
"깍": 76,
"깐": 77,
"깔": 78,
"깜": 79,
"깨": 80,
"꺼": 81,
"껀": 82,
"껄": 83,
"껏": 84,
"껑": 85,
"께": 86,
"껴": 87,
"꼈": 88,
"꼬": 89,
"꼭": 90,
"꼰": 91,
"꼴": 92,
"꽁": 93,
"꽃": 94,
"꽉": 95,
"꾀": 96,
"꾸": 97,
"꾹": 98,
"꾼": 99,
"꿇": 100,
"꿈": 101,
"끄": 102,
"끈": 103,
"끊": 104,
"끌": 105,
"끓": 106,
"끔": 107,
"끗": 108,
"끝": 109,
"끼": 110,
"끽": 111,
"나": 112,
"낙": 113,
"난": 114,
"날": 115,
"남": 116,
"납": 117,
"났": 118,
"낮": 119,
"낯": 120,
"낱": 121,
"낳": 122,
"내": 123,
"낸": 124,
"냄": 125,
"냈": 126,
"냉": 127,
"냐": 128,
"냘": 129,
"냥": 130,
"너": 131,
"넉": 132,
"넌": 133,
"널": 134,
"넘": 135,
"넛": 136,
"넣": 137,
"네": 138,
"녀": 139,
"녁": 140,
"년": 141,
"념": 142,
"녕": 143,
"노": 144,
"녹": 145,
"논": 146,
"놀": 147,
"놈": 148,
"농": 149,
"높": 150,
"놓": 151,
"놔": 152,
"뇌": 153,
"누": 154,
"눈": 155,
"눌": 156,
"눔": 157,
"눕": 158,
"느": 159,
"는": 160,
"늘": 161,
"늙": 162,
"능": 163,
"늦": 164,
"니": 165,
"닉": 166,
"닌": 167,
"닐": 168,
"님": 169,
"다": 170,
"닥": 171,
"닦": 172,
"단": 173,
"닫": 174,
"달": 175,
"닭": 176,
"닮": 177,
"담": 178,
"답": 179,
"닷": 180,
"당": 181,
"닿": 182,
"대": 183,
"댓": 184,
"더": 185,
"덕": 186,
"던": 187,
"덤": 188,
"덥": 189,
"덧": 190,
"덮": 191,
"데": 192,
"덴": 193,
"도": 194,
"독": 195,
"돈": 196,
"돋": 197,
"돌": 198,
"동": 199,
"돼": 200,
"됐": 201,
"되": 202,
"된": 203,
"될": 204,
"됨": 205,
"됩": 206,
"두": 207,
"둑": 208,
"둘": 209,
"둣": 210,
"둥": 211,
"뒤": 212,
"뒷": 213,
"드": 214,
"득": 215,
"든": 216,
"듣": 217,
"들": 218,
"듬": 219,
"듭": 220,
"듯": 221,
"등": 222,
"디": 223,
"딘": 224,
"딜": 225,
"딧": 226,
"딪": 227,
"따": 228,
"딱": 229,
"딴": 230,
"딸": 231,
"땀": 232,
"땅": 233,
"때": 234,
"떠": 235,
"떡": 236,
"떤": 237,
"떨": 238,
"떴": 239,
"떻": 240,
"떼": 241,
"또": 242,
"똑": 243,
"똥": 244,
"뚜": 245,
"뚝": 246,
"뚫": 247,
"뚱": 248,
"뛰": 249,
"뜨": 250,
"뜩": 251,
"뜻": 252,
"띄": 253,
"띠": 254,
"띤": 255,
"라": 256,
"락": 257,
"란": 258,
"랄": 259,
"람": 260,
"랍": 261,
"랐": 262,
"랑": 263,
"랗": 264,
"래": 265,
"랫": 266,
"랬": 267,
"랴": 268,
"략": 269,
"량": 270,
"러": 271,
"럭": 272,
"런": 273,
"럴": 274,
"럼": 275,
"럽": 276,
"렀": 277,
"렁": 278,
"렇": 279,
"레": 280,
"렌": 281,
"려": 282,
"력": 283,
"련": 284,
"렷": 285,
"렸": 286,
"령": 287,
"례": 288,
"로": 289,
"록": 290,
"론": 291,
"롬": 292,
"롭": 293,
"롯": 294,
"뢰": 295,
"료": 296,
"룡": 297,
"루": 298,
"룩": 299,
"룹": 300,
"룽": 301,
"률": 302,
"륭": 303,
"르": 304,
"른": 305,
"를": 306,
"름": 307,
"릅": 308,
"릇": 309,
"릉": 310,
"릎": 311,
"리": 312,
"린": 313,
"릴": 314,
"림": 315,
"립": 316,
"릿": 317,
"마": 318,
"막": 319,
"만": 320,
"많": 321,
"말": 322,
"맘": 323,
"맙": 324,
"맛": 325,
"망": 326,
"맞": 327,
"맡": 328,
"매": 329,
"맨": 330,
"맹": 331,
"맺": 332,
"머": 333,
"먹": 334,
"먼": 335,
"멀": 336,
"멈": 337,
"멍": 338,
"메": 339,
"며": 340,
"면": 341,
"명": 342,
"몇": 343,
"모": 344,
"목": 345,
"몬": 346,
"몰": 347,
"몸": 348,
"몹": 349,
"못": 350,
"몽": 351,
"무": 352,
"묵": 353,
"문": 354,
"묻": 355,
"물": 356,
"뭇": 357,
"뭉": 358,
"뭍": 359,
"뭐": 360,
"뭘": 361,
"뮤": 362,
"므": 363,
"미": 364,
"민": 365,
"믿": 366,
"밀": 367,
"밑": 368,
"바": 369,
"박": 370,
"밖": 371,
"반": 372,
"받": 373,
"발": 374,
"밝": 375,
"밤": 376,
"밥": 377,
"방": 378,
"밭": 379,
"배": 380,
"백": 381,
"뱀": 382,
"뱁": 383,
"뱃": 384,
"뱄": 385,
"뱅": 386,
"버": 387,
"벅": 388,
"번": 389,
"벌": 390,
"범": 391,
"법": 392,
"벗": 393,
"베": 394,
"벼": 395,
"벽": 396,
"변": 397,
"별": 398,
"볍": 399,
"병": 400,
"볕": 401,
"보": 402,
"복": 403,
"본": 404,
"볼": 405,
"봄": 406,
"봅": 407,
"봉": 408,
"봐": 409,
"봤": 410,
"뵈": 411,
"부": 412,
"북": 413,
"분": 414,
"불": 415,
"붉": 416,
"붐": 417,
"붙": 418,
"비": 419,
"빈": 420,
"빌": 421,
"빙": 422,
"빚": 423,
"빛": 424,
"빠": 425,
"빨": 426,
"빼": 427,
"뺄": 428,
"뺑": 429,
"뺨": 430,
"뻔": 431,
"뻘": 432,
"뼈": 433,
"뼉": 434,
"뽑": 435,
"뾰": 436,
"뿌": 437,
"뿐": 438,
"뿡": 439,
"쁘": 440,
"사": 441,
"산": 442,
"살": 443,
"삶": 444,
"삼": 445,
"삿": 446,
"상": 447,
"새": 448,
"색": 449,
"샘": 450,
"생": 451,
"서": 452,
"석": 453,
"섞": 454,
"선": 455,
"설": 456,
"섬": 457,
"섭": 458,
"섯": 459,
"섰": 460,
"성": 461,
"세": 462,
"센": 463,
"셈": 464,
"셔": 465,
"셨": 466,
"소": 467,
"속": 468,
"손": 469,
"솔": 470,
"솜": 471,
"솟": 472,
"송": 473,
"쇼": 474,
"수": 475,
"숙": 476,
"순": 477,
"술": 478,
"숨": 479,
"숫": 480,
"쉬": 481,
"쉴": 482,
"스": 483,
"슥": 484,
"슨": 485,
"슬": 486,
"슴": 487,
"습": 488,
"슷": 489,
"승": 490,
"시": 491,
"식": 492,
"신": 493,
"실": 494,
"싫": 495,
"심": 496,
"십": 497,
"싯": 498,
"싶": 499,
"싸": 500,
"싹": 501,
"쌀": 502,
"쌍": 503,
"쌓": 504,
"써": 505,
"썩": 506,
"썼": 507,
"쎄": 508,
"쏘": 509,
"쏟": 510,
"쏴": 511,
"쑥": 512,
"쓰": 513,
"쓴": 514,
"쓸": 515,
"씀": 516,
"씌": 517,
"씨": 518,
"씩": 519,
"씬": 520,
"씻": 521,
"아": 522,
"악": 523,
"안": 524,
"앉": 525,
"않": 526,
"알": 527,
"암": 528,
"았": 529,
"앙": 530,
"앞": 531,
"애": 532,
"액": 533,
"앤": 534,
"야": 535,
"약": 536,
"양": 537,
"얕": 538,
"어": 539,
"억": 540,
"언": 541,
"얹": 542,
"얻": 543,
"얼": 544,
"얽": 545,
"엄": 546,
"업": 547,
"없": 548,
"엇": 549,
"었": 550,
"엉": 551,
"엌": 552,
"엎": 553,
"에": 554,
"엔": 555,
"엘": 556,
"여": 557,
"역": 558,
"연": 559,
"열": 560,
"엷": 561,
"염": 562,
"였": 563,
"영": 564,
"옆": 565,
"예": 566,
"옛": 567,
"오": 568,
"옥": 569,
"온": 570,
"올": 571,
"옮": 572,
"옵": 573,
"옷": 574,
"와": 575,
"완": 576,
"왔": 577,
"왕": 578,
"왜": 579,
"외": 580,
"요": 581,
"욕": 582,
"용": 583,
"우": 584,
"욱": 585,
"운": 586,
"울": 587,
"움": 588,
"웃": 589,
"웅": 590,
"워": 591,
"원": 592,
"월": 593,
"웠": 594,
"웬": 595,
"위": 596,
"윗": 597,
"유": 598,
"육": 599,
"윤": 600,
"으": 601,
"은": 602,
"을": 603,
"음": 604,
"읍": 605,
"응": 606,
"의": 607,
"이": 608,
"인": 609,
"일": 610,
"읽": 611,
"잃": 612,
"임": 613,
"입": 614,
"잇": 615,
"있": 616,
"잊": 617,
"자": 618,
"작": 619,
"잔": 620,
"잖": 621,
"잘": 622,
"잠": 623,
"잡": 624,
"장": 625,
"잦": 626,
"재": 627,
"쟁": 628,
"저": 629,
"적": 630,
"전": 631,
"절": 632,
"젊": 633,
"점": 634,
"젓": 635,
"정": 636,
"제": 637,
"젯": 638,
"져": 639,
"졌": 640,
"조": 641,
"족": 642,
"존": 643,
"졸": 644,
"좀": 645,
"좁": 646,
"종": 647,
"좋": 648,
"좌": 649,
"죄": 650,
"죠": 651,
"주": 652,
"죽": 653,
"준": 654,
"줄": 655,
"줌": 656,
"중": 657,
"줘": 658,
"쥐": 659,
"쥔": 660,
"즈": 661,
"즉": 662,
"즐": 663,
"즘": 664,
"증": 665,
"지": 666,
"직": 667,
"진": 668,
"질": 669,
"짐": 670,
"집": 671,
"짓": 672,
"징": 673,
"짖": 674,
"짚": 675,
"짜": 676,
"짝": 677,
"째": 678,
"쩌": 679,
"쩍": 680,
"쪼": 681,
"쪽": 682,
"쫑": 683,
"쫓": 684,
"쫙": 685,
"쭉": 686,
"쯤": 687,
"찌": 688,
"찔": 689,
"찡": 690,
"찢": 691,
"차": 692,
"착": 693,
"찬": 694,
"찮": 695,
"찰": 696,
"참": 697,
"찼": 698,
"창": 699,
"찾": 700,
"채": 701,
"책": 702,
"챘": 703,
"챙": 704,
"처": 705,
"척": 706,
"천": 707,
"철": 708,
"첨": 709,
"첩": 710,
"첫": 711,
"청": 712,
"체": 713,
"쳐": 714,
"쳤": 715,
"초": 716,
"촌": 717,
"촛": 718,
"총": 719,
"최": 720,
"추": 721,
"축": 722,
"춘": 723,
"출": 724,
"춤": 725,
"충": 726,
"춰": 727,
"취": 728,
"츠": 729,
"측": 730,
"츰": 731,
"층": 732,
"치": 733,
"칙": 734,
"친": 735,
"칠": 736,
"침": 737,
"칭": 738,
"카": 739,
"칵": 740,
"칼": 741,
"커": 742,
"컥": 743,
"컨": 744,
"컬": 745,
"케": 746,
"켓": 747,
"켕": 748,
"켜": 749,
"켰": 750,
"코": 751,
"콘": 752,
"쾌": 753,
"쿵": 754,
"퀴": 755,
"크": 756,
"큰": 757,
"클": 758,
"큼": 759,
"키": 760,
"킥": 761,
"킬": 762,
"타": 763,
"탄": 764,
"탐": 765,
"탓": 766,
"탔": 767,
"탕": 768,
"태": 769,
"택": 770,
"터": 771,
"턱": 772,
"털": 773,
"테": 774,
"텐": 775,
"토": 776,
"통": 777,
"퇴": 778,
"투": 779,
"툭": 780,
"툼": 781,
"퉁": 782,
"튀": 783,
"트": 784,
"특": 785,
"튼": 786,
"튿": 787,
"틀": 788,
"틈": 789,
"티": 790,
"파": 791,
"판": 792,
"팔": 793,
"팠": 794,
"팡": 795,
"패": 796,
"팽": 797,
"퍼": 798,
"페": 799,
"편": 800,
"폈": 801,
"평": 802,
"포": 803,
"폭": 804,
"폰": 805,
"폿": 806,
"표": 807,
"푸": 808,
"푹": 809,
"푼": 810,
"풀": 811,
"품": 812,
"풋": 813,
"프": 814,
"픈": 815,
"플": 816,
"픔": 817,
"피": 818,
"픽": 819,
"필": 820,
"핍": 821,
"핏": 822,
"핑": 823,
"하": 824,
"학": 825,
"한": 826,
"할": 827,
"함": 828,
"합": 829,
"항": 830,
"해": 831,
"핵": 832,
"햇": 833,
"했": 834,
"행": 835,
"향": 836,
"허": 837,
"헌": 838,
"헛": 839,
"헤": 840,
"혀": 841,
"혁": 842,
"현": 843,
"혔": 844,
"형": 845,
"혜": 846,
"호": 847,
"혹": 848,
"혼": 849,
"홀": 850,
"홍": 851,
"화": 852,
"확": 853,
"환": 854,
"활": 855,
"황": 856,
"회": 857,
"획": 858,
"횡": 859,
"효": 860,
"후": 861,
"훈": 862,
"훌": 863,
"훔": 864,
"훤": 865,
"훨": 866,
"휘": 867,
"휙": 868,
"휩": 869,
"흉": 870,
"흐": 871,
"흑": 872,
"흔": 873,
"흘": 874,
"흙": 875,
"흡": 876,
"흥": 877,
"흩": 878,
"희": 879,
"흰": 880,
"히": 881,
"힌": 882,
"힐": 883,
"힘": 884
}