Upload processor
Browse files- added_tokens.json +0 -14
- preprocessor_config.json +14 -0
- tokenizer_config.json +0 -112
added_tokens.json
CHANGED
@@ -1,18 +1,4 @@
|
|
1 |
{
|
2 |
-
"<breathing>": 51870,
|
3 |
-
"<cough>": 51868,
|
4 |
-
"<gasp>": 51876,
|
5 |
-
"<groan>": 51874,
|
6 |
-
"<inhaling>": 51879,
|
7 |
-
"<laughter>": 51873,
|
8 |
-
"<lip_smack>": 51871,
|
9 |
-
"<lipsmack>": 51872,
|
10 |
-
"<lipsmacking>": 51878,
|
11 |
-
"<sigh>": 51877,
|
12 |
-
"<sniffing>": 51867,
|
13 |
-
"<snorting>": 51869,
|
14 |
-
"<swallowing>": 51875,
|
15 |
-
"<throat_clearing>": 51866,
|
16 |
"<|0.00|>": 50365,
|
17 |
"<|0.02|>": 50366,
|
18 |
"<|0.04|>": 50367,
|
|
|
1 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
"<|0.00|>": 50365,
|
3 |
"<|0.02|>": 50366,
|
4 |
"<|0.04|>": 50367,
|
preprocessor_config.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"chunk_length": 30,
|
3 |
+
"feature_extractor_type": "WhisperFeatureExtractor",
|
4 |
+
"feature_size": 128,
|
5 |
+
"hop_length": 160,
|
6 |
+
"n_fft": 400,
|
7 |
+
"n_samples": 480000,
|
8 |
+
"nb_max_frames": 3000,
|
9 |
+
"padding_side": "right",
|
10 |
+
"padding_value": 0.0,
|
11 |
+
"processor_class": "WhisperProcessor",
|
12 |
+
"return_attention_mask": false,
|
13 |
+
"sampling_rate": 16000
|
14 |
+
}
|
tokenizer_config.json
CHANGED
@@ -12872,118 +12872,6 @@
|
|
12872 |
"rstrip": false,
|
12873 |
"single_word": false,
|
12874 |
"special": false
|
12875 |
-
},
|
12876 |
-
"51866": {
|
12877 |
-
"content": "<throat_clearing>",
|
12878 |
-
"lstrip": false,
|
12879 |
-
"normalized": true,
|
12880 |
-
"rstrip": false,
|
12881 |
-
"single_word": false,
|
12882 |
-
"special": false
|
12883 |
-
},
|
12884 |
-
"51867": {
|
12885 |
-
"content": "<sniffing>",
|
12886 |
-
"lstrip": false,
|
12887 |
-
"normalized": true,
|
12888 |
-
"rstrip": false,
|
12889 |
-
"single_word": false,
|
12890 |
-
"special": false
|
12891 |
-
},
|
12892 |
-
"51868": {
|
12893 |
-
"content": "<cough>",
|
12894 |
-
"lstrip": false,
|
12895 |
-
"normalized": true,
|
12896 |
-
"rstrip": false,
|
12897 |
-
"single_word": false,
|
12898 |
-
"special": false
|
12899 |
-
},
|
12900 |
-
"51869": {
|
12901 |
-
"content": "<snorting>",
|
12902 |
-
"lstrip": false,
|
12903 |
-
"normalized": true,
|
12904 |
-
"rstrip": false,
|
12905 |
-
"single_word": false,
|
12906 |
-
"special": false
|
12907 |
-
},
|
12908 |
-
"51870": {
|
12909 |
-
"content": "<breathing>",
|
12910 |
-
"lstrip": false,
|
12911 |
-
"normalized": true,
|
12912 |
-
"rstrip": false,
|
12913 |
-
"single_word": false,
|
12914 |
-
"special": false
|
12915 |
-
},
|
12916 |
-
"51871": {
|
12917 |
-
"content": "<lip_smack>",
|
12918 |
-
"lstrip": false,
|
12919 |
-
"normalized": true,
|
12920 |
-
"rstrip": false,
|
12921 |
-
"single_word": false,
|
12922 |
-
"special": false
|
12923 |
-
},
|
12924 |
-
"51872": {
|
12925 |
-
"content": "<lipsmack>",
|
12926 |
-
"lstrip": false,
|
12927 |
-
"normalized": true,
|
12928 |
-
"rstrip": false,
|
12929 |
-
"single_word": false,
|
12930 |
-
"special": false
|
12931 |
-
},
|
12932 |
-
"51873": {
|
12933 |
-
"content": "<laughter>",
|
12934 |
-
"lstrip": false,
|
12935 |
-
"normalized": true,
|
12936 |
-
"rstrip": false,
|
12937 |
-
"single_word": false,
|
12938 |
-
"special": false
|
12939 |
-
},
|
12940 |
-
"51874": {
|
12941 |
-
"content": "<groan>",
|
12942 |
-
"lstrip": false,
|
12943 |
-
"normalized": true,
|
12944 |
-
"rstrip": false,
|
12945 |
-
"single_word": false,
|
12946 |
-
"special": false
|
12947 |
-
},
|
12948 |
-
"51875": {
|
12949 |
-
"content": "<swallowing>",
|
12950 |
-
"lstrip": false,
|
12951 |
-
"normalized": true,
|
12952 |
-
"rstrip": false,
|
12953 |
-
"single_word": false,
|
12954 |
-
"special": false
|
12955 |
-
},
|
12956 |
-
"51876": {
|
12957 |
-
"content": "<gasp>",
|
12958 |
-
"lstrip": false,
|
12959 |
-
"normalized": true,
|
12960 |
-
"rstrip": false,
|
12961 |
-
"single_word": false,
|
12962 |
-
"special": false
|
12963 |
-
},
|
12964 |
-
"51877": {
|
12965 |
-
"content": "<sigh>",
|
12966 |
-
"lstrip": false,
|
12967 |
-
"normalized": true,
|
12968 |
-
"rstrip": false,
|
12969 |
-
"single_word": false,
|
12970 |
-
"special": false
|
12971 |
-
},
|
12972 |
-
"51878": {
|
12973 |
-
"content": "<lipsmacking>",
|
12974 |
-
"lstrip": false,
|
12975 |
-
"normalized": true,
|
12976 |
-
"rstrip": false,
|
12977 |
-
"single_word": false,
|
12978 |
-
"special": false
|
12979 |
-
},
|
12980 |
-
"51879": {
|
12981 |
-
"content": "<inhaling>",
|
12982 |
-
"lstrip": false,
|
12983 |
-
"normalized": true,
|
12984 |
-
"rstrip": false,
|
12985 |
-
"single_word": false,
|
12986 |
-
"special": false
|
12987 |
}
|
12988 |
},
|
12989 |
"additional_special_tokens": [
|
|
|
12872 |
"rstrip": false,
|
12873 |
"single_word": false,
|
12874 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12875 |
}
|
12876 |
},
|
12877 |
"additional_special_tokens": [
|