Spaces:
Build error
Build error
Update audio_processing.py
Browse files- audio_processing.py +27 -6
audio_processing.py
CHANGED
@@ -4,6 +4,7 @@ import whisper
|
|
4 |
import subprocess
|
5 |
import numpy as np
|
6 |
import gradio as gr
|
|
|
7 |
import torchaudio as ta
|
8 |
|
9 |
from model_utils import get_processor, get_model, get_whisper_model_small, get_device
|
@@ -19,17 +20,37 @@ from config import SAMPLING_RATE, CHUNK_LENGTH_S
|
|
19 |
|
20 |
@spaces.GPU
|
21 |
def load_and_resample_audio(file):
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
if sample_rate != SAMPLING_RATE:
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
|
|
27 |
# Ensure the audio is in the correct shape (mono)
|
28 |
if waveform.dim() > 1 and waveform.shape[0] > 1:
|
29 |
waveform = waveform.mean(dim=0, keepdim=True)
|
30 |
-
|
|
|
|
|
31 |
return waveform, SAMPLING_RATE
|
32 |
-
|
33 |
|
34 |
@spaces.GPU
|
35 |
def detect_language(audio):
|
|
|
4 |
import subprocess
|
5 |
import numpy as np
|
6 |
import gradio as gr
|
7 |
+
import soundfile as sf
|
8 |
import torchaudio as ta
|
9 |
|
10 |
from model_utils import get_processor, get_model, get_whisper_model_small, get_device
|
|
|
20 |
|
21 |
@spaces.GPU
|
22 |
def load_and_resample_audio(file):
|
23 |
+
try:
|
24 |
+
# First attempt: Use torchaudio.load()
|
25 |
+
waveform, sample_rate = torchaudio.load(file)
|
26 |
+
except Exception as e:
|
27 |
+
print(f"torchaudio.load() failed: {e}")
|
28 |
+
try:
|
29 |
+
# Second attempt: Use soundfile
|
30 |
+
waveform, sample_rate = sf.read(file)
|
31 |
+
waveform = torch.from_numpy(waveform.T).float()
|
32 |
+
if waveform.dim() == 1:
|
33 |
+
waveform = waveform.unsqueeze(0)
|
34 |
+
except Exception as e:
|
35 |
+
print(f"soundfile.read() failed: {e}")
|
36 |
+
raise ValueError(f"Failed to load audio file: {file}")
|
37 |
+
|
38 |
+
print(f"Original audio shape: {waveform.shape}, Sample rate: {sample_rate}")
|
39 |
+
|
40 |
if sample_rate != SAMPLING_RATE:
|
41 |
+
try:
|
42 |
+
waveform = F.resample(waveform, sample_rate, SAMPLING_RATE)
|
43 |
+
except Exception as e:
|
44 |
+
print(f"Resampling failed: {e}")
|
45 |
+
raise ValueError(f"Failed to resample audio from {sample_rate} to {SAMPLING_RATE}")
|
46 |
+
|
47 |
# Ensure the audio is in the correct shape (mono)
|
48 |
if waveform.dim() > 1 and waveform.shape[0] > 1:
|
49 |
waveform = waveform.mean(dim=0, keepdim=True)
|
50 |
+
|
51 |
+
print(f"Processed audio shape: {waveform.shape}, New sample rate: {SAMPLING_RATE}")
|
52 |
+
|
53 |
return waveform, SAMPLING_RATE
|
|
|
54 |
|
55 |
@spaces.GPU
|
56 |
def detect_language(audio):
|