Macedonian-ASR
/

wav2vec2-aed-macedonian-asr

Automatic Speech Recognition

Model card Files Files and versions Community

Porjaz commited on Aug 19, 2024

Commit

641eeeb

·

verified ·

1 Parent(s): b1c4d32

Update custom_interface.py

Files changed (1) hide show

custom_interface.py +6 -3

custom_interface.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import torch
 from speechbrain.inference.interfaces import Pretrained
 class ASR(Pretrained):
@@ -20,13 +21,15 @@ class ASR(Pretrained):
         # Output layer for seq2seq log-probabilities
         predictions = self.hparams.test_search(encoded_outputs, self.wav_lens)[0]
         predicted_words = [self.hparams.tokenizer.decode_ids(prediction).split(" ") for prediction in predictions]
-        print(predicted_words)
         return predicted_words
     def classify_file(self, path):
-        waveform = self.load_audio(path)
         # Fake a batch:
         batch = waveform.unsqueeze(0)
         rel_length = torch.tensor([1.0])
@@ -35,4 +38,4 @@ class ASR(Pretrained):
         return outputs
     # def forward(self, wavs, wav_lens=None):
-    #     return self.encode_batch(wavs=wavs, wav_lens=wav_lens)

 import torch
 from speechbrain.inference.interfaces import Pretrained
+import librosa
 class ASR(Pretrained):
         # Output layer for seq2seq log-probabilities
         predictions = self.hparams.test_search(encoded_outputs, self.wav_lens)[0]
         predicted_words = [self.hparams.tokenizer.decode_ids(prediction).split(" ") for prediction in predictions]
         return predicted_words
     def classify_file(self, path):
+        # waveform = self.load_audio(path)
+        waveform, sr = librosa.load(path, sr=16000)
+        waveform = torch.tensor(waveform).unsqueeze(0)
         # Fake a batch:
         batch = waveform.unsqueeze(0)
         rel_length = torch.tensor([1.0])
         return outputs
     # def forward(self, wavs, wav_lens=None):
+    #     return self.encode_batch(wavs=wavs, wav_lens=wav_lens)