Upload 4 files
Browse files- tokenizer.json +135 -0
- tokenizer_config.json +120 -0
tokenizer.json
CHANGED
@@ -30,6 +30,15 @@
|
|
30 |
"normalized": false,
|
31 |
"special": true
|
32 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
{
|
34 |
"id": 32000,
|
35 |
"content": "<extra_id_99>",
|
@@ -947,6 +956,132 @@
|
|
947 |
"rstrip": false,
|
948 |
"normalized": true,
|
949 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
950 |
}
|
951 |
],
|
952 |
"normalizer": {
|
|
|
30 |
"normalized": false,
|
31 |
"special": true
|
32 |
},
|
33 |
+
{
|
34 |
+
"id": 10452,
|
35 |
+
"content": "good",
|
36 |
+
"single_word": false,
|
37 |
+
"lstrip": false,
|
38 |
+
"rstrip": false,
|
39 |
+
"normalized": true,
|
40 |
+
"special": false
|
41 |
+
},
|
42 |
{
|
43 |
"id": 32000,
|
44 |
"content": "<extra_id_99>",
|
|
|
956 |
"rstrip": false,
|
957 |
"normalized": true,
|
958 |
"special": false
|
959 |
+
},
|
960 |
+
{
|
961 |
+
"id": 32102,
|
962 |
+
"content": "happy",
|
963 |
+
"single_word": false,
|
964 |
+
"lstrip": false,
|
965 |
+
"rstrip": false,
|
966 |
+
"normalized": true,
|
967 |
+
"special": false
|
968 |
+
},
|
969 |
+
{
|
970 |
+
"id": 32103,
|
971 |
+
"content": "intelligent",
|
972 |
+
"single_word": false,
|
973 |
+
"lstrip": false,
|
974 |
+
"rstrip": false,
|
975 |
+
"normalized": true,
|
976 |
+
"special": false
|
977 |
+
},
|
978 |
+
{
|
979 |
+
"id": 32104,
|
980 |
+
"content": "response",
|
981 |
+
"single_word": false,
|
982 |
+
"lstrip": false,
|
983 |
+
"rstrip": false,
|
984 |
+
"normalized": true,
|
985 |
+
"special": false
|
986 |
+
},
|
987 |
+
{
|
988 |
+
"id": 32105,
|
989 |
+
"content": "environment",
|
990 |
+
"single_word": false,
|
991 |
+
"lstrip": false,
|
992 |
+
"rstrip": false,
|
993 |
+
"normalized": true,
|
994 |
+
"special": false
|
995 |
+
},
|
996 |
+
{
|
997 |
+
"id": 32106,
|
998 |
+
"content": "amazeballs",
|
999 |
+
"single_word": false,
|
1000 |
+
"lstrip": false,
|
1001 |
+
"rstrip": false,
|
1002 |
+
"normalized": true,
|
1003 |
+
"special": false
|
1004 |
+
},
|
1005 |
+
{
|
1006 |
+
"id": 32107,
|
1007 |
+
"content": "cryptocurrency",
|
1008 |
+
"single_word": false,
|
1009 |
+
"lstrip": false,
|
1010 |
+
"rstrip": false,
|
1011 |
+
"normalized": true,
|
1012 |
+
"special": false
|
1013 |
+
},
|
1014 |
+
{
|
1015 |
+
"id": 32108,
|
1016 |
+
"content": "webinar",
|
1017 |
+
"single_word": false,
|
1018 |
+
"lstrip": false,
|
1019 |
+
"rstrip": false,
|
1020 |
+
"normalized": true,
|
1021 |
+
"special": false
|
1022 |
+
},
|
1023 |
+
{
|
1024 |
+
"id": 32109,
|
1025 |
+
"content": "vlog",
|
1026 |
+
"single_word": false,
|
1027 |
+
"lstrip": false,
|
1028 |
+
"rstrip": false,
|
1029 |
+
"normalized": true,
|
1030 |
+
"special": false
|
1031 |
+
},
|
1032 |
+
{
|
1033 |
+
"id": 32110,
|
1034 |
+
"content": "upcycle",
|
1035 |
+
"single_word": false,
|
1036 |
+
"lstrip": false,
|
1037 |
+
"rstrip": false,
|
1038 |
+
"normalized": true,
|
1039 |
+
"special": false
|
1040 |
+
},
|
1041 |
+
{
|
1042 |
+
"id": 32111,
|
1043 |
+
"content": "photobomb",
|
1044 |
+
"single_word": false,
|
1045 |
+
"lstrip": false,
|
1046 |
+
"rstrip": false,
|
1047 |
+
"normalized": true,
|
1048 |
+
"special": false
|
1049 |
+
},
|
1050 |
+
{
|
1051 |
+
"id": 32112,
|
1052 |
+
"content": "facepalm",
|
1053 |
+
"single_word": false,
|
1054 |
+
"lstrip": false,
|
1055 |
+
"rstrip": false,
|
1056 |
+
"normalized": true,
|
1057 |
+
"special": false
|
1058 |
+
},
|
1059 |
+
{
|
1060 |
+
"id": 32113,
|
1061 |
+
"content": "crowdfunding",
|
1062 |
+
"single_word": false,
|
1063 |
+
"lstrip": false,
|
1064 |
+
"rstrip": false,
|
1065 |
+
"normalized": true,
|
1066 |
+
"special": false
|
1067 |
+
},
|
1068 |
+
{
|
1069 |
+
"id": 32114,
|
1070 |
+
"content": "bromance",
|
1071 |
+
"single_word": false,
|
1072 |
+
"lstrip": false,
|
1073 |
+
"rstrip": false,
|
1074 |
+
"normalized": true,
|
1075 |
+
"special": false
|
1076 |
+
},
|
1077 |
+
{
|
1078 |
+
"id": 32115,
|
1079 |
+
"content": "hangry",
|
1080 |
+
"single_word": false,
|
1081 |
+
"lstrip": false,
|
1082 |
+
"rstrip": false,
|
1083 |
+
"normalized": true,
|
1084 |
+
"special": false
|
1085 |
}
|
1086 |
],
|
1087 |
"normalizer": {
|
tokenizer_config.json
CHANGED
@@ -24,6 +24,14 @@
|
|
24 |
"single_word": false,
|
25 |
"special": true
|
26 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
"32000": {
|
28 |
"content": "<extra_id_99>",
|
29 |
"lstrip": false,
|
@@ -839,6 +847,118 @@
|
|
839 |
"rstrip": false,
|
840 |
"single_word": false,
|
841 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
842 |
}
|
843 |
},
|
844 |
"additional_special_tokens": [
|
|
|
24 |
"single_word": false,
|
25 |
"special": true
|
26 |
},
|
27 |
+
"10452": {
|
28 |
+
"content": "good",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": true,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": false
|
34 |
+
},
|
35 |
"32000": {
|
36 |
"content": "<extra_id_99>",
|
37 |
"lstrip": false,
|
|
|
847 |
"rstrip": false,
|
848 |
"single_word": false,
|
849 |
"special": false
|
850 |
+
},
|
851 |
+
"32102": {
|
852 |
+
"content": "happy",
|
853 |
+
"lstrip": false,
|
854 |
+
"normalized": true,
|
855 |
+
"rstrip": false,
|
856 |
+
"single_word": false,
|
857 |
+
"special": false
|
858 |
+
},
|
859 |
+
"32103": {
|
860 |
+
"content": "intelligent",
|
861 |
+
"lstrip": false,
|
862 |
+
"normalized": true,
|
863 |
+
"rstrip": false,
|
864 |
+
"single_word": false,
|
865 |
+
"special": false
|
866 |
+
},
|
867 |
+
"32104": {
|
868 |
+
"content": "response",
|
869 |
+
"lstrip": false,
|
870 |
+
"normalized": true,
|
871 |
+
"rstrip": false,
|
872 |
+
"single_word": false,
|
873 |
+
"special": false
|
874 |
+
},
|
875 |
+
"32105": {
|
876 |
+
"content": "environment",
|
877 |
+
"lstrip": false,
|
878 |
+
"normalized": true,
|
879 |
+
"rstrip": false,
|
880 |
+
"single_word": false,
|
881 |
+
"special": false
|
882 |
+
},
|
883 |
+
"32106": {
|
884 |
+
"content": "amazeballs",
|
885 |
+
"lstrip": false,
|
886 |
+
"normalized": true,
|
887 |
+
"rstrip": false,
|
888 |
+
"single_word": false,
|
889 |
+
"special": false
|
890 |
+
},
|
891 |
+
"32107": {
|
892 |
+
"content": "cryptocurrency",
|
893 |
+
"lstrip": false,
|
894 |
+
"normalized": true,
|
895 |
+
"rstrip": false,
|
896 |
+
"single_word": false,
|
897 |
+
"special": false
|
898 |
+
},
|
899 |
+
"32108": {
|
900 |
+
"content": "webinar",
|
901 |
+
"lstrip": false,
|
902 |
+
"normalized": true,
|
903 |
+
"rstrip": false,
|
904 |
+
"single_word": false,
|
905 |
+
"special": false
|
906 |
+
},
|
907 |
+
"32109": {
|
908 |
+
"content": "vlog",
|
909 |
+
"lstrip": false,
|
910 |
+
"normalized": true,
|
911 |
+
"rstrip": false,
|
912 |
+
"single_word": false,
|
913 |
+
"special": false
|
914 |
+
},
|
915 |
+
"32110": {
|
916 |
+
"content": "upcycle",
|
917 |
+
"lstrip": false,
|
918 |
+
"normalized": true,
|
919 |
+
"rstrip": false,
|
920 |
+
"single_word": false,
|
921 |
+
"special": false
|
922 |
+
},
|
923 |
+
"32111": {
|
924 |
+
"content": "photobomb",
|
925 |
+
"lstrip": false,
|
926 |
+
"normalized": true,
|
927 |
+
"rstrip": false,
|
928 |
+
"single_word": false,
|
929 |
+
"special": false
|
930 |
+
},
|
931 |
+
"32112": {
|
932 |
+
"content": "facepalm",
|
933 |
+
"lstrip": false,
|
934 |
+
"normalized": true,
|
935 |
+
"rstrip": false,
|
936 |
+
"single_word": false,
|
937 |
+
"special": false
|
938 |
+
},
|
939 |
+
"32113": {
|
940 |
+
"content": "crowdfunding",
|
941 |
+
"lstrip": false,
|
942 |
+
"normalized": true,
|
943 |
+
"rstrip": false,
|
944 |
+
"single_word": false,
|
945 |
+
"special": false
|
946 |
+
},
|
947 |
+
"32114": {
|
948 |
+
"content": "bromance",
|
949 |
+
"lstrip": false,
|
950 |
+
"normalized": true,
|
951 |
+
"rstrip": false,
|
952 |
+
"single_word": false,
|
953 |
+
"special": false
|
954 |
+
},
|
955 |
+
"32115": {
|
956 |
+
"content": "hangry",
|
957 |
+
"lstrip": false,
|
958 |
+
"normalized": true,
|
959 |
+
"rstrip": false,
|
960 |
+
"single_word": false,
|
961 |
+
"special": false
|
962 |
}
|
963 |
},
|
964 |
"additional_special_tokens": [
|