modified files after adding lm

Browse files

Files changed (4) hide show

alphabet.json +1 -0
preprocessor_config.json +1 -0
special_tokens_map.json +14 -0
tokenizer_config.json +2 -1

alphabet.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"labels": [" ", "\u0c01", "\u0c02", "\u0c03", "\u0c05", "\u0c06", "\u0c07", "\u0c08", "\u0c09", "\u0c0a", "\u0c0b", "\u0c0e", "\u0c0f", "\u0c10", "\u0c12", "\u0c13", "\u0c14", "\u0c15", "\u0c16", "\u0c17", "\u0c18", "\u0c19", "\u0c1a", "\u0c1b", "\u0c1c", "\u0c1e", "\u0c1f", "\u0c20", "\u0c21", "\u0c22", "\u0c23", "\u0c24", "\u0c25", "\u0c26", "\u0c27", "\u0c28", "\u0c2a", "\u0c2b", "\u0c2c", "\u0c2d", "\u0c2e", "\u0c2f", "\u0c30", "\u0c31", "\u0c32", "\u0c33", "\u0c35", "\u0c36", "\u0c37", "\u0c38", "\u0c39", "\u0c3e", "\u0c3f", "\u0c40", "\u0c41", "\u0c42", "\u0c43", "\u0c46", "\u0c47", "\u0c48", "\u0c4a", "\u0c4b", "\u0c4c", "\u0c4d", "\u2019", "\u2047", "", "<s>", "</s>"], "is_bpe": false}

preprocessor_config.json CHANGED Viewed

@@ -4,6 +4,7 @@
   "feature_size": 1,
   "padding_side": "right",
   "padding_value": 0.0,
   "return_attention_mask": true,
   "sampling_rate": 16000
 }

   "feature_size": 1,
   "padding_side": "right",
   "padding_value": 0.0,
+  "processor_class": "Wav2Vec2ProcessorWithLM",
   "return_attention_mask": true,
   "sampling_rate": 16000
 }

special_tokens_map.json CHANGED Viewed

@@ -1,5 +1,19 @@
 {
   "additional_special_tokens": [
     {
       "content": "<s>",
       "lstrip": false,

 {
   "additional_special_tokens": [
+    {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
     {
       "content": "<s>",
       "lstrip": false,

tokenizer_config.json CHANGED Viewed

@@ -2,8 +2,9 @@
   "bos_token": "<s>",
   "do_lower_case": false,
   "eos_token": "</s>",
-  "name_or_path": "./",
   "pad_token": "[PAD]",
   "replace_word_delimiter_char": " ",
   "special_tokens_map_file": null,
   "tokenizer_class": "Wav2Vec2CTCTokenizer",

   "bos_token": "<s>",
   "do_lower_case": false,
   "eos_token": "</s>",
+  "name_or_path": "krishnateja/wav2vec2-telugu_150",
   "pad_token": "[PAD]",
+  "processor_class": "Wav2Vec2ProcessorWithLM",
   "replace_word_delimiter_char": " ",
   "special_tokens_map_file": null,
   "tokenizer_class": "Wav2Vec2CTCTokenizer",