krishnateja commited on
Commit
3da88f3
·
1 Parent(s): afce653

modified files after adding lm

Browse files
alphabet.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"labels": [" ", "\u0c01", "\u0c02", "\u0c03", "\u0c05", "\u0c06", "\u0c07", "\u0c08", "\u0c09", "\u0c0a", "\u0c0b", "\u0c0e", "\u0c0f", "\u0c10", "\u0c12", "\u0c13", "\u0c14", "\u0c15", "\u0c16", "\u0c17", "\u0c18", "\u0c19", "\u0c1a", "\u0c1b", "\u0c1c", "\u0c1e", "\u0c1f", "\u0c20", "\u0c21", "\u0c22", "\u0c23", "\u0c24", "\u0c25", "\u0c26", "\u0c27", "\u0c28", "\u0c2a", "\u0c2b", "\u0c2c", "\u0c2d", "\u0c2e", "\u0c2f", "\u0c30", "\u0c31", "\u0c32", "\u0c33", "\u0c35", "\u0c36", "\u0c37", "\u0c38", "\u0c39", "\u0c3e", "\u0c3f", "\u0c40", "\u0c41", "\u0c42", "\u0c43", "\u0c46", "\u0c47", "\u0c48", "\u0c4a", "\u0c4b", "\u0c4c", "\u0c4d", "\u2019", "\u2047", "", "<s>", "</s>"], "is_bpe": false}
preprocessor_config.json CHANGED
@@ -4,6 +4,7 @@
4
  "feature_size": 1,
5
  "padding_side": "right",
6
  "padding_value": 0.0,
 
7
  "return_attention_mask": true,
8
  "sampling_rate": 16000
9
  }
 
4
  "feature_size": 1,
5
  "padding_side": "right",
6
  "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
  "return_attention_mask": true,
9
  "sampling_rate": 16000
10
  }
special_tokens_map.json CHANGED
@@ -1,5 +1,19 @@
1
  {
2
  "additional_special_tokens": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  {
4
  "content": "<s>",
5
  "lstrip": false,
 
1
  {
2
  "additional_special_tokens": [
3
+ {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "</s>",
12
+ "lstrip": false,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
  {
18
  "content": "<s>",
19
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -2,8 +2,9 @@
2
  "bos_token": "<s>",
3
  "do_lower_case": false,
4
  "eos_token": "</s>",
5
- "name_or_path": "./",
6
  "pad_token": "[PAD]",
 
7
  "replace_word_delimiter_char": " ",
8
  "special_tokens_map_file": null,
9
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
 
2
  "bos_token": "<s>",
3
  "do_lower_case": false,
4
  "eos_token": "</s>",
5
+ "name_or_path": "krishnateja/wav2vec2-telugu_150",
6
  "pad_token": "[PAD]",
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
  "replace_word_delimiter_char": " ",
9
  "special_tokens_map_file": null,
10
  "tokenizer_class": "Wav2Vec2CTCTokenizer",