ayjays132 commited on
Commit
701913d
1 Parent(s): 9f8525a

Upload 4 files

Browse files
Files changed (2) hide show
  1. tokenizer.json +135 -0
  2. tokenizer_config.json +120 -0
tokenizer.json CHANGED
@@ -30,6 +30,15 @@
30
  "normalized": false,
31
  "special": true
32
  },
 
 
 
 
 
 
 
 
 
33
  {
34
  "id": 32000,
35
  "content": "<extra_id_99>",
@@ -947,6 +956,132 @@
947
  "rstrip": false,
948
  "normalized": true,
949
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
950
  }
951
  ],
952
  "normalizer": {
 
30
  "normalized": false,
31
  "special": true
32
  },
33
+ {
34
+ "id": 10452,
35
+ "content": "good",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": true,
40
+ "special": false
41
+ },
42
  {
43
  "id": 32000,
44
  "content": "<extra_id_99>",
 
956
  "rstrip": false,
957
  "normalized": true,
958
  "special": false
959
+ },
960
+ {
961
+ "id": 32102,
962
+ "content": "happy",
963
+ "single_word": false,
964
+ "lstrip": false,
965
+ "rstrip": false,
966
+ "normalized": true,
967
+ "special": false
968
+ },
969
+ {
970
+ "id": 32103,
971
+ "content": "intelligent",
972
+ "single_word": false,
973
+ "lstrip": false,
974
+ "rstrip": false,
975
+ "normalized": true,
976
+ "special": false
977
+ },
978
+ {
979
+ "id": 32104,
980
+ "content": "response",
981
+ "single_word": false,
982
+ "lstrip": false,
983
+ "rstrip": false,
984
+ "normalized": true,
985
+ "special": false
986
+ },
987
+ {
988
+ "id": 32105,
989
+ "content": "environment",
990
+ "single_word": false,
991
+ "lstrip": false,
992
+ "rstrip": false,
993
+ "normalized": true,
994
+ "special": false
995
+ },
996
+ {
997
+ "id": 32106,
998
+ "content": "amazeballs",
999
+ "single_word": false,
1000
+ "lstrip": false,
1001
+ "rstrip": false,
1002
+ "normalized": true,
1003
+ "special": false
1004
+ },
1005
+ {
1006
+ "id": 32107,
1007
+ "content": "cryptocurrency",
1008
+ "single_word": false,
1009
+ "lstrip": false,
1010
+ "rstrip": false,
1011
+ "normalized": true,
1012
+ "special": false
1013
+ },
1014
+ {
1015
+ "id": 32108,
1016
+ "content": "webinar",
1017
+ "single_word": false,
1018
+ "lstrip": false,
1019
+ "rstrip": false,
1020
+ "normalized": true,
1021
+ "special": false
1022
+ },
1023
+ {
1024
+ "id": 32109,
1025
+ "content": "vlog",
1026
+ "single_word": false,
1027
+ "lstrip": false,
1028
+ "rstrip": false,
1029
+ "normalized": true,
1030
+ "special": false
1031
+ },
1032
+ {
1033
+ "id": 32110,
1034
+ "content": "upcycle",
1035
+ "single_word": false,
1036
+ "lstrip": false,
1037
+ "rstrip": false,
1038
+ "normalized": true,
1039
+ "special": false
1040
+ },
1041
+ {
1042
+ "id": 32111,
1043
+ "content": "photobomb",
1044
+ "single_word": false,
1045
+ "lstrip": false,
1046
+ "rstrip": false,
1047
+ "normalized": true,
1048
+ "special": false
1049
+ },
1050
+ {
1051
+ "id": 32112,
1052
+ "content": "facepalm",
1053
+ "single_word": false,
1054
+ "lstrip": false,
1055
+ "rstrip": false,
1056
+ "normalized": true,
1057
+ "special": false
1058
+ },
1059
+ {
1060
+ "id": 32113,
1061
+ "content": "crowdfunding",
1062
+ "single_word": false,
1063
+ "lstrip": false,
1064
+ "rstrip": false,
1065
+ "normalized": true,
1066
+ "special": false
1067
+ },
1068
+ {
1069
+ "id": 32114,
1070
+ "content": "bromance",
1071
+ "single_word": false,
1072
+ "lstrip": false,
1073
+ "rstrip": false,
1074
+ "normalized": true,
1075
+ "special": false
1076
+ },
1077
+ {
1078
+ "id": 32115,
1079
+ "content": "hangry",
1080
+ "single_word": false,
1081
+ "lstrip": false,
1082
+ "rstrip": false,
1083
+ "normalized": true,
1084
+ "special": false
1085
  }
1086
  ],
1087
  "normalizer": {
tokenizer_config.json CHANGED
@@ -24,6 +24,14 @@
24
  "single_word": false,
25
  "special": true
26
  },
 
 
 
 
 
 
 
 
27
  "32000": {
28
  "content": "<extra_id_99>",
29
  "lstrip": false,
@@ -839,6 +847,118 @@
839
  "rstrip": false,
840
  "single_word": false,
841
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
842
  }
843
  },
844
  "additional_special_tokens": [
 
24
  "single_word": false,
25
  "special": true
26
  },
27
+ "10452": {
28
+ "content": "good",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": false
34
+ },
35
  "32000": {
36
  "content": "<extra_id_99>",
37
  "lstrip": false,
 
847
  "rstrip": false,
848
  "single_word": false,
849
  "special": false
850
+ },
851
+ "32102": {
852
+ "content": "happy",
853
+ "lstrip": false,
854
+ "normalized": true,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": false
858
+ },
859
+ "32103": {
860
+ "content": "intelligent",
861
+ "lstrip": false,
862
+ "normalized": true,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": false
866
+ },
867
+ "32104": {
868
+ "content": "response",
869
+ "lstrip": false,
870
+ "normalized": true,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": false
874
+ },
875
+ "32105": {
876
+ "content": "environment",
877
+ "lstrip": false,
878
+ "normalized": true,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": false
882
+ },
883
+ "32106": {
884
+ "content": "amazeballs",
885
+ "lstrip": false,
886
+ "normalized": true,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": false
890
+ },
891
+ "32107": {
892
+ "content": "cryptocurrency",
893
+ "lstrip": false,
894
+ "normalized": true,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": false
898
+ },
899
+ "32108": {
900
+ "content": "webinar",
901
+ "lstrip": false,
902
+ "normalized": true,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": false
906
+ },
907
+ "32109": {
908
+ "content": "vlog",
909
+ "lstrip": false,
910
+ "normalized": true,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": false
914
+ },
915
+ "32110": {
916
+ "content": "upcycle",
917
+ "lstrip": false,
918
+ "normalized": true,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": false
922
+ },
923
+ "32111": {
924
+ "content": "photobomb",
925
+ "lstrip": false,
926
+ "normalized": true,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": false
930
+ },
931
+ "32112": {
932
+ "content": "facepalm",
933
+ "lstrip": false,
934
+ "normalized": true,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": false
938
+ },
939
+ "32113": {
940
+ "content": "crowdfunding",
941
+ "lstrip": false,
942
+ "normalized": true,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": false
946
+ },
947
+ "32114": {
948
+ "content": "bromance",
949
+ "lstrip": false,
950
+ "normalized": true,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": false
954
+ },
955
+ "32115": {
956
+ "content": "hangry",
957
+ "lstrip": false,
958
+ "normalized": true,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": false
962
  }
963
  },
964
  "additional_special_tokens": [