Kkonjeong commited on
Commit
c6b1cd7
β€’
1 Parent(s): b9c0d51

Upload processor

Browse files
Files changed (3) hide show
  1. added_tokens.json +2 -2
  2. tokenizer_config.json +5 -5
  3. vocab.json +54 -36
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 37,
3
- "<s>": 36
4
  }
 
1
  {
2
+ "</s>": 55,
3
+ "<s>": 54
4
  }
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "added_tokens_decoder": {
3
- "34": {
4
  "content": "[UNK]",
5
  "lstrip": true,
6
  "normalized": false,
@@ -8,7 +8,7 @@
8
  "single_word": false,
9
  "special": false
10
  },
11
- "35": {
12
  "content": "[PAD]",
13
  "lstrip": true,
14
  "normalized": false,
@@ -16,7 +16,7 @@
16
  "single_word": false,
17
  "special": false
18
  },
19
- "36": {
20
  "content": "<s>",
21
  "lstrip": false,
22
  "normalized": false,
@@ -24,7 +24,7 @@
24
  "single_word": false,
25
  "special": true
26
  },
27
- "37": {
28
  "content": "</s>",
29
  "lstrip": false,
30
  "normalized": false,
@@ -44,5 +44,5 @@
44
  "target_lang": null,
45
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
46
  "unk_token": "[UNK]",
47
- "word_delimiter_token": "|"
48
  }
 
1
  {
2
  "added_tokens_decoder": {
3
+ "52": {
4
  "content": "[UNK]",
5
  "lstrip": true,
6
  "normalized": false,
 
8
  "single_word": false,
9
  "special": false
10
  },
11
+ "53": {
12
  "content": "[PAD]",
13
  "lstrip": true,
14
  "normalized": false,
 
16
  "single_word": false,
17
  "special": false
18
  },
19
+ "54": {
20
  "content": "<s>",
21
  "lstrip": false,
22
  "normalized": false,
 
24
  "single_word": false,
25
  "special": true
26
  },
27
+ "55": {
28
  "content": "</s>",
29
  "lstrip": false,
30
  "normalized": false,
 
44
  "target_lang": null,
45
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
46
  "unk_token": "[UNK]",
47
+ "word_delimiter_token": " "
48
  }
vocab.json CHANGED
@@ -1,38 +1,56 @@
1
  {
2
- " ": 0,
3
- "[PAD]": 35,
4
- "[UNK]": 34,
5
- "γ„±": 1,
6
- "γ„²": 2,
7
- "γ„΄": 3,
8
- "γ„·": 4,
9
- "γ„Έ": 5,
10
- "γ„Ή": 6,
11
- "ㅁ": 7,
12
- "γ…‚": 8,
13
- "γ…ƒ": 9,
14
- "γ……": 10,
15
- "γ…†": 11,
16
- "γ…‡": 12,
17
- "γ…ˆ": 13,
18
- "γ…‰": 14,
19
- "γ…Š": 15,
20
- "γ…‹": 16,
21
- "γ…Œ": 17,
22
- "ㅍ": 18,
23
- "γ…Ž": 19,
24
- "ㅏ": 20,
25
- "ㅐ": 21,
26
- "γ…‘": 22,
27
- "γ…’": 23,
28
- "γ…“": 24,
29
- "γ…”": 25,
30
- "γ…•": 26,
31
- "γ…–": 27,
32
- "γ…—": 28,
33
- "γ…›": 29,
34
- "γ…œ": 30,
35
- "γ… ": 31,
36
- "γ…‘": 32,
37
- "γ…£": 33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  }
 
1
  {
2
+ " ": 51,
3
+ "[PAD]": 53,
4
+ "[UNK]": 52,
5
+ "γ„±": 0,
6
+ "γ„²": 1,
7
+ "γ„³": 40,
8
+ "γ„΄": 2,
9
+ "γ„΅": 41,
10
+ "γ„Ά": 42,
11
+ "γ„·": 3,
12
+ "γ„Έ": 4,
13
+ "γ„Ή": 5,
14
+ "γ„Ί": 43,
15
+ "γ„»": 44,
16
+ "γ„Ό": 45,
17
+ "γ„½": 46,
18
+ "γ„Ύ": 47,
19
+ "γ„Ώ": 48,
20
+ "γ…€": 49,
21
+ "ㅁ": 6,
22
+ "γ…‚": 7,
23
+ "γ…ƒ": 8,
24
+ "γ…„": 50,
25
+ "γ……": 9,
26
+ "γ…†": 10,
27
+ "γ…‡": 11,
28
+ "γ…ˆ": 12,
29
+ "γ…‰": 13,
30
+ "γ…Š": 14,
31
+ "γ…‹": 15,
32
+ "γ…Œ": 16,
33
+ "ㅍ": 17,
34
+ "γ…Ž": 18,
35
+ "ㅏ": 19,
36
+ "ㅐ": 20,
37
+ "γ…‘": 21,
38
+ "γ…’": 22,
39
+ "γ…“": 23,
40
+ "γ…”": 24,
41
+ "γ…•": 25,
42
+ "γ…–": 26,
43
+ "γ…—": 27,
44
+ "γ…˜": 28,
45
+ "γ…™": 29,
46
+ "γ…š": 30,
47
+ "γ…›": 31,
48
+ "γ…œ": 32,
49
+ "ㅝ": 33,
50
+ "γ…ž": 34,
51
+ "γ…Ÿ": 35,
52
+ "γ… ": 36,
53
+ "γ…‘": 37,
54
+ "γ…’": 38,
55
+ "γ…£": 39
56
  }