Upload tokenizer
Browse files- added_tokens.json +14 -0
- tokenizer_config.json +112 -0
added_tokens.json
CHANGED
@@ -1,4 +1,18 @@
|
|
1 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
"<|0.00|>": 50365,
|
3 |
"<|0.02|>": 50366,
|
4 |
"<|0.04|>": 50367,
|
|
|
1 |
{
|
2 |
+
"<breathing>": 51870,
|
3 |
+
"<cough>": 51868,
|
4 |
+
"<gasp>": 51876,
|
5 |
+
"<groan>": 51874,
|
6 |
+
"<inhaling>": 51879,
|
7 |
+
"<laughter>": 51873,
|
8 |
+
"<lip_smack>": 51871,
|
9 |
+
"<lipsmack>": 51872,
|
10 |
+
"<lipsmacking>": 51878,
|
11 |
+
"<sigh>": 51877,
|
12 |
+
"<sniffing>": 51867,
|
13 |
+
"<snorting>": 51869,
|
14 |
+
"<swallowing>": 51875,
|
15 |
+
"<throat_clearing>": 51866,
|
16 |
"<|0.00|>": 50365,
|
17 |
"<|0.02|>": 50366,
|
18 |
"<|0.04|>": 50367,
|
tokenizer_config.json
CHANGED
@@ -12872,6 +12872,118 @@
|
|
12872 |
"rstrip": false,
|
12873 |
"single_word": false,
|
12874 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12875 |
}
|
12876 |
},
|
12877 |
"additional_special_tokens": [
|
|
|
12872 |
"rstrip": false,
|
12873 |
"single_word": false,
|
12874 |
"special": false
|
12875 |
+
},
|
12876 |
+
"51866": {
|
12877 |
+
"content": "<throat_clearing>",
|
12878 |
+
"lstrip": false,
|
12879 |
+
"normalized": true,
|
12880 |
+
"rstrip": false,
|
12881 |
+
"single_word": false,
|
12882 |
+
"special": false
|
12883 |
+
},
|
12884 |
+
"51867": {
|
12885 |
+
"content": "<sniffing>",
|
12886 |
+
"lstrip": false,
|
12887 |
+
"normalized": true,
|
12888 |
+
"rstrip": false,
|
12889 |
+
"single_word": false,
|
12890 |
+
"special": false
|
12891 |
+
},
|
12892 |
+
"51868": {
|
12893 |
+
"content": "<cough>",
|
12894 |
+
"lstrip": false,
|
12895 |
+
"normalized": true,
|
12896 |
+
"rstrip": false,
|
12897 |
+
"single_word": false,
|
12898 |
+
"special": false
|
12899 |
+
},
|
12900 |
+
"51869": {
|
12901 |
+
"content": "<snorting>",
|
12902 |
+
"lstrip": false,
|
12903 |
+
"normalized": true,
|
12904 |
+
"rstrip": false,
|
12905 |
+
"single_word": false,
|
12906 |
+
"special": false
|
12907 |
+
},
|
12908 |
+
"51870": {
|
12909 |
+
"content": "<breathing>",
|
12910 |
+
"lstrip": false,
|
12911 |
+
"normalized": true,
|
12912 |
+
"rstrip": false,
|
12913 |
+
"single_word": false,
|
12914 |
+
"special": false
|
12915 |
+
},
|
12916 |
+
"51871": {
|
12917 |
+
"content": "<lip_smack>",
|
12918 |
+
"lstrip": false,
|
12919 |
+
"normalized": true,
|
12920 |
+
"rstrip": false,
|
12921 |
+
"single_word": false,
|
12922 |
+
"special": false
|
12923 |
+
},
|
12924 |
+
"51872": {
|
12925 |
+
"content": "<lipsmack>",
|
12926 |
+
"lstrip": false,
|
12927 |
+
"normalized": true,
|
12928 |
+
"rstrip": false,
|
12929 |
+
"single_word": false,
|
12930 |
+
"special": false
|
12931 |
+
},
|
12932 |
+
"51873": {
|
12933 |
+
"content": "<laughter>",
|
12934 |
+
"lstrip": false,
|
12935 |
+
"normalized": true,
|
12936 |
+
"rstrip": false,
|
12937 |
+
"single_word": false,
|
12938 |
+
"special": false
|
12939 |
+
},
|
12940 |
+
"51874": {
|
12941 |
+
"content": "<groan>",
|
12942 |
+
"lstrip": false,
|
12943 |
+
"normalized": true,
|
12944 |
+
"rstrip": false,
|
12945 |
+
"single_word": false,
|
12946 |
+
"special": false
|
12947 |
+
},
|
12948 |
+
"51875": {
|
12949 |
+
"content": "<swallowing>",
|
12950 |
+
"lstrip": false,
|
12951 |
+
"normalized": true,
|
12952 |
+
"rstrip": false,
|
12953 |
+
"single_word": false,
|
12954 |
+
"special": false
|
12955 |
+
},
|
12956 |
+
"51876": {
|
12957 |
+
"content": "<gasp>",
|
12958 |
+
"lstrip": false,
|
12959 |
+
"normalized": true,
|
12960 |
+
"rstrip": false,
|
12961 |
+
"single_word": false,
|
12962 |
+
"special": false
|
12963 |
+
},
|
12964 |
+
"51877": {
|
12965 |
+
"content": "<sigh>",
|
12966 |
+
"lstrip": false,
|
12967 |
+
"normalized": true,
|
12968 |
+
"rstrip": false,
|
12969 |
+
"single_word": false,
|
12970 |
+
"special": false
|
12971 |
+
},
|
12972 |
+
"51878": {
|
12973 |
+
"content": "<lipsmacking>",
|
12974 |
+
"lstrip": false,
|
12975 |
+
"normalized": true,
|
12976 |
+
"rstrip": false,
|
12977 |
+
"single_word": false,
|
12978 |
+
"special": false
|
12979 |
+
},
|
12980 |
+
"51879": {
|
12981 |
+
"content": "<inhaling>",
|
12982 |
+
"lstrip": false,
|
12983 |
+
"normalized": true,
|
12984 |
+
"rstrip": false,
|
12985 |
+
"single_word": false,
|
12986 |
+
"special": false
|
12987 |
}
|
12988 |
},
|
12989 |
"additional_special_tokens": [
|