Kr08 commited on
Commit
0fd738e
·
verified ·
1 Parent(s): 3e497df

Update audio_processing.py

Browse files
Files changed (1) hide show
  1. audio_processing.py +27 -6
audio_processing.py CHANGED
@@ -4,6 +4,7 @@ import whisper
4
  import subprocess
5
  import numpy as np
6
  import gradio as gr
 
7
  import torchaudio as ta
8
 
9
  from model_utils import get_processor, get_model, get_whisper_model_small, get_device
@@ -19,17 +20,37 @@ from config import SAMPLING_RATE, CHUNK_LENGTH_S
19
 
20
  @spaces.GPU
21
  def load_and_resample_audio(file):
22
- waveform, sample_rate = torchaudio.load(file)
23
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  if sample_rate != SAMPLING_RATE:
25
- waveform = F.resample(waveform, sample_rate, SAMPLING_RATE)
26
-
 
 
 
 
27
  # Ensure the audio is in the correct shape (mono)
28
  if waveform.dim() > 1 and waveform.shape[0] > 1:
29
  waveform = waveform.mean(dim=0, keepdim=True)
30
-
 
 
31
  return waveform, SAMPLING_RATE
32
-
33
 
34
  @spaces.GPU
35
  def detect_language(audio):
 
4
  import subprocess
5
  import numpy as np
6
  import gradio as gr
7
+ import soundfile as sf
8
  import torchaudio as ta
9
 
10
  from model_utils import get_processor, get_model, get_whisper_model_small, get_device
 
20
 
21
  @spaces.GPU
22
  def load_and_resample_audio(file):
23
+ try:
24
+ # First attempt: Use torchaudio.load()
25
+ waveform, sample_rate = torchaudio.load(file)
26
+ except Exception as e:
27
+ print(f"torchaudio.load() failed: {e}")
28
+ try:
29
+ # Second attempt: Use soundfile
30
+ waveform, sample_rate = sf.read(file)
31
+ waveform = torch.from_numpy(waveform.T).float()
32
+ if waveform.dim() == 1:
33
+ waveform = waveform.unsqueeze(0)
34
+ except Exception as e:
35
+ print(f"soundfile.read() failed: {e}")
36
+ raise ValueError(f"Failed to load audio file: {file}")
37
+
38
+ print(f"Original audio shape: {waveform.shape}, Sample rate: {sample_rate}")
39
+
40
  if sample_rate != SAMPLING_RATE:
41
+ try:
42
+ waveform = F.resample(waveform, sample_rate, SAMPLING_RATE)
43
+ except Exception as e:
44
+ print(f"Resampling failed: {e}")
45
+ raise ValueError(f"Failed to resample audio from {sample_rate} to {SAMPLING_RATE}")
46
+
47
  # Ensure the audio is in the correct shape (mono)
48
  if waveform.dim() > 1 and waveform.shape[0] > 1:
49
  waveform = waveform.mean(dim=0, keepdim=True)
50
+
51
+ print(f"Processed audio shape: {waveform.shape}, New sample rate: {SAMPLING_RATE}")
52
+
53
  return waveform, SAMPLING_RATE
 
54
 
55
  @spaces.GPU
56
  def detect_language(audio):