Spaces:

Kr08
/

ASR_gradio

Build error

App Files Files Community

Kr08 commited on Aug 28, 2024

Commit

0fd738e

verified ·

1 Parent(s): 3e497df

Update audio_processing.py

Browse files

Files changed (1) hide show

audio_processing.py +27 -6

audio_processing.py CHANGED Viewed

@@ -4,6 +4,7 @@ import whisper
 import subprocess
 import numpy as np
 import gradio as gr
 import torchaudio as ta
 from model_utils import get_processor, get_model, get_whisper_model_small, get_device
@@ -19,17 +20,37 @@ from config import SAMPLING_RATE, CHUNK_LENGTH_S
 @spaces.GPU
 def load_and_resample_audio(file):
-    waveform, sample_rate = torchaudio.load(file)
     if sample_rate != SAMPLING_RATE:
-        waveform = F.resample(waveform, sample_rate, SAMPLING_RATE)
     # Ensure the audio is in the correct shape (mono)
     if waveform.dim() > 1 and waveform.shape[0] > 1:
         waveform = waveform.mean(dim=0, keepdim=True)
     return waveform, SAMPLING_RATE
 @spaces.GPU
 def detect_language(audio):

 import subprocess
 import numpy as np
 import gradio as gr
+import soundfile as sf
 import torchaudio as ta
 from model_utils import get_processor, get_model, get_whisper_model_small, get_device
 @spaces.GPU
 def load_and_resample_audio(file):
+    try:
+        # First attempt: Use torchaudio.load()
+        waveform, sample_rate = torchaudio.load(file)
+    except Exception as e:
+        print(f"torchaudio.load() failed: {e}")
+        try:
+            # Second attempt: Use soundfile
+            waveform, sample_rate = sf.read(file)
+            waveform = torch.from_numpy(waveform.T).float()
+            if waveform.dim() == 1:
+                waveform = waveform.unsqueeze(0)
+        except Exception as e:
+            print(f"soundfile.read() failed: {e}")
+            raise ValueError(f"Failed to load audio file: {file}")
+    print(f"Original audio shape: {waveform.shape}, Sample rate: {sample_rate}")
     if sample_rate != SAMPLING_RATE:
+        try:
+            waveform = F.resample(waveform, sample_rate, SAMPLING_RATE)
+        except Exception as e:
+            print(f"Resampling failed: {e}")
+            raise ValueError(f"Failed to resample audio from {sample_rate} to {SAMPLING_RATE}")
     # Ensure the audio is in the correct shape (mono)
     if waveform.dim() > 1 and waveform.shape[0] > 1:
         waveform = waveform.mean(dim=0, keepdim=True)
+    print(f"Processed audio shape: {waveform.shape}, New sample rate: {SAMPLING_RATE}")
     return waveform, SAMPLING_RATE
 @spaces.GPU
 def detect_language(audio):