unijoh commited on
Commit
ec21c18
1 Parent(s): 945f569

Update asr.py

Browse files
Files changed (1) hide show
  1. asr.py +19 -14
asr.py CHANGED
@@ -1,5 +1,6 @@
1
  import librosa
2
- from transformers import pipeline
 
3
  import logging
4
 
5
  # Set up logging
@@ -7,14 +8,13 @@ logging.basicConfig(level=logging.DEBUG)
7
 
8
  ASR_SAMPLING_RATE = 16_000
9
  MODEL_ID = "facebook/mms-1b-all"
10
- LANGUAGE_CODE = "fao" # Faroese language code
11
 
12
  try:
13
- # Create the pipeline with the appropriate model
14
- pipe = pipeline("automatic-speech-recognition", model=MODEL_ID, tokenizer=MODEL_ID)
15
- logging.info("ASR pipeline loaded successfully.")
16
  except Exception as e:
17
- logging.error(f"Error loading ASR pipeline: {e}")
18
 
19
  def transcribe(audio):
20
  try:
@@ -27,8 +27,6 @@ def transcribe(audio):
27
  # Try loading the audio file with librosa
28
  try:
29
  audio_samples, _ = librosa.load(audio, sr=ASR_SAMPLING_RATE, mono=True)
30
- if len(audio_samples) == 0:
31
- raise ValueError("Audio samples are empty")
32
  except FileNotFoundError:
33
  logging.error("Audio file not found")
34
  return "ERROR: Audio file not found"
@@ -36,12 +34,19 @@ def transcribe(audio):
36
  logging.error(f"Error loading audio file with librosa: {e}")
37
  return f"ERROR: Unable to load audio file - {e}"
38
 
39
- # Process the audio with the pipeline
40
- try:
41
- transcription = pipe(audio_samples, chunk_length_s=10, stride_length_s=5)["text"]
42
- except Exception as e:
43
- logging.error(f"Error during transcription with pipeline: {e}")
44
- return f"ERROR: Transcription failed - {e}"
 
 
 
 
 
 
 
45
 
46
  logging.info("Transcription completed successfully.")
47
  return transcription
 
1
  import librosa
2
+ from transformers import AutoProcessor, Wav2Vec2ForCTC
3
+ import torch
4
  import logging
5
 
6
  # Set up logging
 
8
 
9
  ASR_SAMPLING_RATE = 16_000
10
  MODEL_ID = "facebook/mms-1b-all"
 
11
 
12
  try:
13
+ processor = AutoProcessor.from_pretrained(MODEL_ID)
14
+ model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
15
+ logging.info("ASR model and processor loaded successfully.")
16
  except Exception as e:
17
+ logging.error(f"Error loading ASR model or processor: {e}")
18
 
19
  def transcribe(audio):
20
  try:
 
27
  # Try loading the audio file with librosa
28
  try:
29
  audio_samples, _ = librosa.load(audio, sr=ASR_SAMPLING_RATE, mono=True)
 
 
30
  except FileNotFoundError:
31
  logging.error("Audio file not found")
32
  return "ERROR: Audio file not found"
 
34
  logging.error(f"Error loading audio file with librosa: {e}")
35
  return f"ERROR: Unable to load audio file - {e}"
36
 
37
+ # Set the language for the processor to Faroese
38
+ lang_code = "fao"
39
+ processor.tokenizer.set_target_lang(lang_code)
40
+ model.load_adapter(lang_code)
41
+
42
+ # Process the audio with the processor
43
+ inputs = processor(audio_samples, sampling_rate=ASR_SAMPLING_RATE, return_tensors="pt")
44
+
45
+ with torch.no_grad():
46
+ outputs = model(**inputs).logits
47
+
48
+ ids = torch.argmax(outputs, dim=-1)[0]
49
+ transcription = processor.decode(ids)
50
 
51
  logging.info("Transcription completed successfully.")
52
  return transcription