Spaces:
Running
Running
Update asr.py
Browse files
asr.py
CHANGED
@@ -7,6 +7,7 @@ import numpy as np
|
|
7 |
processor = AutoProcessor.from_pretrained("ixxan/whisper-small-common-voice-ug")
|
8 |
model = AutoModelForSpeechSeq2Seq.from_pretrained("ixxan/whisper-small-common-voice-ug")
|
9 |
|
|
|
10 |
|
11 |
def transcribe(audio_data) -> str:
|
12 |
"""
|
@@ -32,15 +33,15 @@ def transcribe(audio_data) -> str:
|
|
32 |
|
33 |
else:
|
34 |
return "<<ERROR: Invalid Audio Input Instance: {}>>".format(type(audio_data))
|
35 |
-
|
36 |
|
37 |
# Resample if needed
|
38 |
-
if sampling_rate !=
|
39 |
-
resampler = torchaudio.transforms.Resample(sampling_rate,
|
40 |
audio_input = resampler(audio_input)
|
41 |
|
42 |
# Preprocess the audio input
|
43 |
-
inputs = processor(audio_input.squeeze(), sampling_rate=
|
44 |
|
45 |
# Move model to GPU if available
|
46 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
7 |
processor = AutoProcessor.from_pretrained("ixxan/whisper-small-common-voice-ug")
|
8 |
model = AutoModelForSpeechSeq2Seq.from_pretrained("ixxan/whisper-small-common-voice-ug")
|
9 |
|
10 |
+
target_sr = processor.feature_extractor.sampling_rate
|
11 |
|
12 |
def transcribe(audio_data) -> str:
|
13 |
"""
|
|
|
33 |
|
34 |
else:
|
35 |
return "<<ERROR: Invalid Audio Input Instance: {}>>".format(type(audio_data))
|
36 |
+
|
37 |
|
38 |
# Resample if needed
|
39 |
+
if sampling_rate != target_sr:
|
40 |
+
resampler = torchaudio.transforms.Resample(sampling_rate, target_sr)
|
41 |
audio_input = resampler(audio_input)
|
42 |
|
43 |
# Preprocess the audio input
|
44 |
+
inputs = processor(audio_input.squeeze(), sampling_rate=target_sr, return_tensors="pt")
|
45 |
|
46 |
# Move model to GPU if available
|
47 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|