Spaces:

unijoh
/

metaambod

Running

App Files Files Community

unijoh commited on Jun 15

Commit

ec21c18

•

1 Parent(s): 945f569

Update asr.py

Browse files

Files changed (1) hide show

asr.py +19 -14

asr.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import librosa
-from transformers import pipeline
 import logging
 # Set up logging
@@ -7,14 +8,13 @@ logging.basicConfig(level=logging.DEBUG)
 ASR_SAMPLING_RATE = 16_000
 MODEL_ID = "facebook/mms-1b-all"
-LANGUAGE_CODE = "fao"  # Faroese language code
 try:
-    # Create the pipeline with the appropriate model
-    pipe = pipeline("automatic-speech-recognition", model=MODEL_ID, tokenizer=MODEL_ID)
-    logging.info("ASR pipeline loaded successfully.")
 except Exception as e:
-    logging.error(f"Error loading ASR pipeline: {e}")
 def transcribe(audio):
     try:
@@ -27,8 +27,6 @@ def transcribe(audio):
         # Try loading the audio file with librosa
         try:
             audio_samples, _ = librosa.load(audio, sr=ASR_SAMPLING_RATE, mono=True)
-            if len(audio_samples) == 0:
-                raise ValueError("Audio samples are empty")
         except FileNotFoundError:
             logging.error("Audio file not found")
             return "ERROR: Audio file not found"
@@ -36,12 +34,19 @@ def transcribe(audio):
             logging.error(f"Error loading audio file with librosa: {e}")
             return f"ERROR: Unable to load audio file - {e}"
-        # Process the audio with the pipeline
-        try:
-            transcription = pipe(audio_samples, chunk_length_s=10, stride_length_s=5)["text"]
-        except Exception as e:
-            logging.error(f"Error during transcription with pipeline: {e}")
-            return f"ERROR: Transcription failed - {e}"
         logging.info("Transcription completed successfully.")
         return transcription

 import librosa
+from transformers import AutoProcessor, Wav2Vec2ForCTC
+import torch
 import logging
 # Set up logging
 ASR_SAMPLING_RATE = 16_000
 MODEL_ID = "facebook/mms-1b-all"
 try:
+    processor = AutoProcessor.from_pretrained(MODEL_ID)
+    model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
+    logging.info("ASR model and processor loaded successfully.")
 except Exception as e:
+    logging.error(f"Error loading ASR model or processor: {e}")
 def transcribe(audio):
     try:
         # Try loading the audio file with librosa
         try:
             audio_samples, _ = librosa.load(audio, sr=ASR_SAMPLING_RATE, mono=True)
         except FileNotFoundError:
             logging.error("Audio file not found")
             return "ERROR: Audio file not found"
             logging.error(f"Error loading audio file with librosa: {e}")
             return f"ERROR: Unable to load audio file - {e}"
+        # Set the language for the processor to Faroese
+        lang_code = "fao"
+        processor.tokenizer.set_target_lang(lang_code)
+        model.load_adapter(lang_code)
+        # Process the audio with the processor
+        inputs = processor(audio_samples, sampling_rate=ASR_SAMPLING_RATE, return_tensors="pt")
+        with torch.no_grad():
+            outputs = model(**inputs).logits
+        ids = torch.argmax(outputs, dim=-1)[0]
+        transcription = processor.decode(ids)
         logging.info("Transcription completed successfully.")
         return transcription