tobykim commited on
Commit
f945eb2
·
verified ·
1 Parent(s): 5b220bc

Upload tokenizer

Browse files
Files changed (2) hide show
  1. added_tokens.json +14 -0
  2. tokenizer_config.json +112 -0
added_tokens.json CHANGED
@@ -1,4 +1,18 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "<|0.00|>": 50365,
3
  "<|0.02|>": 50366,
4
  "<|0.04|>": 50367,
 
1
  {
2
+ "<breathing>": 51870,
3
+ "<cough>": 51868,
4
+ "<gasp>": 51876,
5
+ "<groan>": 51874,
6
+ "<inhaling>": 51879,
7
+ "<laughter>": 51873,
8
+ "<lip_smack>": 51871,
9
+ "<lipsmack>": 51872,
10
+ "<lipsmacking>": 51878,
11
+ "<sigh>": 51877,
12
+ "<sniffing>": 51867,
13
+ "<snorting>": 51869,
14
+ "<swallowing>": 51875,
15
+ "<throat_clearing>": 51866,
16
  "<|0.00|>": 50365,
17
  "<|0.02|>": 50366,
18
  "<|0.04|>": 50367,
tokenizer_config.json CHANGED
@@ -12872,6 +12872,118 @@
12872
  "rstrip": false,
12873
  "single_word": false,
12874
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12875
  }
12876
  },
12877
  "additional_special_tokens": [
 
12872
  "rstrip": false,
12873
  "single_word": false,
12874
  "special": false
12875
+ },
12876
+ "51866": {
12877
+ "content": "<throat_clearing>",
12878
+ "lstrip": false,
12879
+ "normalized": true,
12880
+ "rstrip": false,
12881
+ "single_word": false,
12882
+ "special": false
12883
+ },
12884
+ "51867": {
12885
+ "content": "<sniffing>",
12886
+ "lstrip": false,
12887
+ "normalized": true,
12888
+ "rstrip": false,
12889
+ "single_word": false,
12890
+ "special": false
12891
+ },
12892
+ "51868": {
12893
+ "content": "<cough>",
12894
+ "lstrip": false,
12895
+ "normalized": true,
12896
+ "rstrip": false,
12897
+ "single_word": false,
12898
+ "special": false
12899
+ },
12900
+ "51869": {
12901
+ "content": "<snorting>",
12902
+ "lstrip": false,
12903
+ "normalized": true,
12904
+ "rstrip": false,
12905
+ "single_word": false,
12906
+ "special": false
12907
+ },
12908
+ "51870": {
12909
+ "content": "<breathing>",
12910
+ "lstrip": false,
12911
+ "normalized": true,
12912
+ "rstrip": false,
12913
+ "single_word": false,
12914
+ "special": false
12915
+ },
12916
+ "51871": {
12917
+ "content": "<lip_smack>",
12918
+ "lstrip": false,
12919
+ "normalized": true,
12920
+ "rstrip": false,
12921
+ "single_word": false,
12922
+ "special": false
12923
+ },
12924
+ "51872": {
12925
+ "content": "<lipsmack>",
12926
+ "lstrip": false,
12927
+ "normalized": true,
12928
+ "rstrip": false,
12929
+ "single_word": false,
12930
+ "special": false
12931
+ },
12932
+ "51873": {
12933
+ "content": "<laughter>",
12934
+ "lstrip": false,
12935
+ "normalized": true,
12936
+ "rstrip": false,
12937
+ "single_word": false,
12938
+ "special": false
12939
+ },
12940
+ "51874": {
12941
+ "content": "<groan>",
12942
+ "lstrip": false,
12943
+ "normalized": true,
12944
+ "rstrip": false,
12945
+ "single_word": false,
12946
+ "special": false
12947
+ },
12948
+ "51875": {
12949
+ "content": "<swallowing>",
12950
+ "lstrip": false,
12951
+ "normalized": true,
12952
+ "rstrip": false,
12953
+ "single_word": false,
12954
+ "special": false
12955
+ },
12956
+ "51876": {
12957
+ "content": "<gasp>",
12958
+ "lstrip": false,
12959
+ "normalized": true,
12960
+ "rstrip": false,
12961
+ "single_word": false,
12962
+ "special": false
12963
+ },
12964
+ "51877": {
12965
+ "content": "<sigh>",
12966
+ "lstrip": false,
12967
+ "normalized": true,
12968
+ "rstrip": false,
12969
+ "single_word": false,
12970
+ "special": false
12971
+ },
12972
+ "51878": {
12973
+ "content": "<lipsmacking>",
12974
+ "lstrip": false,
12975
+ "normalized": true,
12976
+ "rstrip": false,
12977
+ "single_word": false,
12978
+ "special": false
12979
+ },
12980
+ "51879": {
12981
+ "content": "<inhaling>",
12982
+ "lstrip": false,
12983
+ "normalized": true,
12984
+ "rstrip": false,
12985
+ "single_word": false,
12986
+ "special": false
12987
  }
12988
  },
12989
  "additional_special_tokens": [