Spaces:
Runtime error
Runtime error
bofenghuang
commited on
Commit
β’
e927cf5
1
Parent(s):
a356f8e
fix wavform type
Browse files- requirements.txt +1 -2
- run_demo.py +12 -7
requirements.txt
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
transformers
|
2 |
-
|
3 |
-
torchaudio
|
4 |
pyctcdecode
|
5 |
pypi-kenlm
|
|
|
1 |
transformers
|
2 |
+
librosa
|
|
|
3 |
pyctcdecode
|
4 |
pypi-kenlm
|
run_demo.py
CHANGED
@@ -2,7 +2,8 @@ import logging
|
|
2 |
import warnings
|
3 |
|
4 |
import gradio as gr
|
5 |
-
import
|
|
|
6 |
from transformers import pipeline
|
7 |
from transformers.utils.logging import disable_progress_bar
|
8 |
|
@@ -24,13 +25,17 @@ logger.info("ASR pipeline has been initialized")
|
|
24 |
|
25 |
|
26 |
def process_audio_file(audio_file):
|
27 |
-
waveform, sample_rate = torchaudio.load(audio_file)
|
28 |
-
waveform = waveform.squeeze(axis=0) # mono
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
30 |
# resample
|
31 |
if sample_rate != SAMPLE_RATE:
|
32 |
-
|
33 |
-
waveform = resampler(waveform)
|
34 |
|
35 |
return waveform
|
36 |
|
@@ -52,7 +57,7 @@ def transcribe(microphone_audio_file, uploaded_audio_file):
|
|
52 |
|
53 |
audio_data = process_audio_file(audio_file)
|
54 |
|
55 |
-
# text = pipe(
|
56 |
text = pipe(audio_data)["text"]
|
57 |
logger.info(f"Transcription for {audio_file}: {text}")
|
58 |
|
|
|
2 |
import warnings
|
3 |
|
4 |
import gradio as gr
|
5 |
+
import librosa
|
6 |
+
# import torchaudio
|
7 |
from transformers import pipeline
|
8 |
from transformers.utils.logging import disable_progress_bar
|
9 |
|
|
|
25 |
|
26 |
|
27 |
def process_audio_file(audio_file):
|
28 |
+
# waveform, sample_rate = torchaudio.load(audio_file)
|
29 |
+
# waveform = waveform.squeeze(axis=0) # mono
|
30 |
+
# # resample
|
31 |
+
# if sample_rate != SAMPLE_RATE:
|
32 |
+
# resampler = torchaudio.transforms.Resample(sample_rate, SAMPLE_RATE)
|
33 |
+
# waveform = resampler(waveform)
|
34 |
+
|
35 |
+
waveform, sample_rate = librosa.load(audio_file, mono=True)
|
36 |
# resample
|
37 |
if sample_rate != SAMPLE_RATE:
|
38 |
+
waveform = librosa.resample(waveform, orig_sr=sample_rate, target_sr=SAMPLE_RATE)
|
|
|
39 |
|
40 |
return waveform
|
41 |
|
|
|
57 |
|
58 |
audio_data = process_audio_file(audio_file)
|
59 |
|
60 |
+
# text = pipe(audio_data, chunk_length_s=30, stride_length_s=5)["text"]
|
61 |
text = pipe(audio_data)["text"]
|
62 |
logger.info(f"Transcription for {audio_file}: {text}")
|
63 |
|