Sri3010
/

wav2vec2-large-xls-r-300m-TAMIL-colab

Automatic Speech Recognition

Inference Endpoints

Model card Files Files and versions Metrics Training metrics Community

Sri3010 commited on Oct 26, 2024

Commit

356578f

·

verified ·

1 Parent(s): 822438a

Upload tokenizer

Files changed (3) hide show

added_tokens.json +2 -2
tokenizer_config.json +4 -4
vocab.json +42 -46

added_tokens.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "</s>": 100,
-  "<s>": 99
 }

 {
+  "</s>": 96,
+  "<s>": 95
 }

tokenizer_config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "added_tokens_decoder": {
-    "97": {
       "content": "[UNK]",
       "lstrip": true,
       "normalized": false,
@@ -8,7 +8,7 @@
       "single_word": false,
       "special": false
     },
-    "98": {
       "content": "[PAD]",
       "lstrip": true,
       "normalized": false,
@@ -16,7 +16,7 @@
       "single_word": false,
       "special": false
     },
-    "99": {
       "content": "<s>",
       "lstrip": false,
       "normalized": false,
@@ -24,7 +24,7 @@
       "single_word": false,
       "special": true
     },
-    "100": {
       "content": "</s>",
       "lstrip": false,
       "normalized": false,

 {
   "added_tokens_decoder": {
+    "93": {
       "content": "[UNK]",
       "lstrip": true,
       "normalized": false,
       "single_word": false,
       "special": false
     },
+    "94": {
       "content": "[PAD]",
       "lstrip": true,
       "normalized": false,
       "single_word": false,
       "special": false
     },
+    "95": {
       "content": "<s>",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "96": {
       "content": "</s>",
       "lstrip": false,
       "normalized": false,

vocab.json CHANGED Viewed

@@ -2,8 +2,8 @@
   "&": 1,
   "(": 2,
   ")": 3,
-  "[PAD]": 98,
-  "[UNK]": 97,
   "\\": 4,
   "_": 5,
   "`": 6,
@@ -39,8 +39,8 @@
   "¾": 35,
   "á": 36,
   "é": 37,
-  "š": 38,
-  "ஃ": 39,
   "அ": 40,
   "ஆ": 41,
   "இ": 42,
@@ -56,46 +56,42 @@
   "க": 52,
   "ங": 53,
   "ச": 54,
-  "ஜ": 55,
-  "ஞ": 56,
-  "ட": 57,
-  "ண": 58,
-  "த": 59,
-  "ந": 60,
-  "ன": 61,
-  "ப": 62,
-  "ம": 63,
-  "ய": 64,
-  "ர": 65,
-  "ற": 66,
-  "ல": 67,
-  "ள": 68,
-  "ழ": 69,
-  "வ": 70,
-  "ஷ": 71,
-  "ஸ": 72,
-  "ஹ": 73,
-  "ா": 74,
-  "ி": 75,
-  "ீ": 76,
-  "ு": 77,
-  "ூ": 78,
-  "ெ": 79,
-  "ே": 80,
-  "ை": 81,
-  "ொ": 82,
-  "ோ": 83,
-  "ௌ": 84,
-  "்": 85,
-  "ௗ": 86,
-  "ഥ": 87,
-  "–": 88,
-  "—": 89,
-  "’": 90,
-  "‚": 91,
-  "•": 92,
-  "…": 93,
-  "″": 94,
-  "●": 95,
-  "◯": 96
 }

   "&": 1,
   "(": 2,
   ")": 3,
+  "[PAD]": 94,
+  "[UNK]": 93,
   "\\": 4,
   "_": 5,
   "`": 6,
   "¾": 35,
   "á": 36,
   "é": 37,
+  "ô": 38,
+  "š": 39,
   "அ": 40,
   "ஆ": 41,
   "இ": 42,
   "க": 52,
   "ங": 53,
   "ச": 54,
+  "ஞ": 55,
+  "ட": 56,
+  "ண": 57,
+  "த": 58,
+  "ந": 59,
+  "ன": 60,
+  "ப": 61,
+  "ம": 62,
+  "ய": 63,
+  "ர": 64,
+  "ற": 65,
+  "ல": 66,
+  "ள": 67,
+  "ழ": 68,
+  "வ": 69,
+  "ஷ": 70,
+  "ா": 71,
+  "ி": 72,
+  "ீ": 73,
+  "ு": 74,
+  "ூ": 75,
+  "ெ": 76,
+  "ே": 77,
+  "ை": 78,
+  "ொ": 79,
+  "ோ": 80,
+  "்": 81,
+  "ௗ": 82,
+  "ഥ": 83,
+  "–": 84,
+  "—": 85,
+  "’": 86,
+  "‚": 87,
+  "•": 88,
+  "…": 89,
+  "″": 90,
+  "●": 91,
+  "◯": 92
 }