indonesian-nlp
/

wav2vec2-luganda

Automatic Speech Recognition

Inference Endpoints

Model card Files Files and versions Community

cahya commited on Jan 19, 2022

Commit

67d044b

•

1 Parent(s): e69a778

Update README.md

Files changed (1) hide show

README.md +9 -3

README.md CHANGED Viewed

@@ -10,7 +10,7 @@ tags:
 - speech
 license: apache-2.0
 model-index:
-- name: Wav2Vec2 Luganda
   results:
   - task:
       name: Speech Recognition
@@ -54,7 +54,10 @@ resampler = torchaudio.transforms.Resample(48_000, 16_000)
 # Preprocessing the datasets.
 # We need to read the aduio files as arrays
 def speech_file_to_array_fn(batch):
-    speech_array, sampling_rate = torchaudio.load(batch["path"])
     batch["speech"] = resampler(speech_array).squeeze().numpy()
     return batch
@@ -98,7 +101,10 @@ resampler = torchaudio.transforms.Resample(48_000, 16_000)
 # We need to read the audio files as arrays
 def speech_file_to_array_fn(batch):
     batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower()
-    speech_array, sampling_rate = torchaudio.load(batch["path"])
     batch["speech"] = resampler(speech_array).squeeze().numpy()
     return batch

 - speech
 license: apache-2.0
 model-index:
+- name: Wav2Vec2 Luganda by Indonesian-NLP
   results:
   - task:
       name: Speech Recognition
 # Preprocessing the datasets.
 # We need to read the aduio files as arrays
 def speech_file_to_array_fn(batch):
+    if "audio" in batch:
+        speech_array = torch.tensor(batch["audio"]["array"])
+    else:
+        speech_array, sampling_rate = torchaudio.load(batch["path"])
     batch["speech"] = resampler(speech_array).squeeze().numpy()
     return batch
 # We need to read the audio files as arrays
 def speech_file_to_array_fn(batch):
     batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower()
+    if "audio" in batch:
+        speech_array = torch.tensor(batch["audio"]["array"])
+    else:
+        speech_array, sampling_rate = torchaudio.load(batch["path"])
     batch["speech"] = resampler(speech_array).squeeze().numpy()
     return batch