akki2825's picture
Update run.py
3d8de62
raw
history blame
2.28 kB
from deepspeech import Model
import gradio as gr
import numpy as np
import urllib.request
import wave
import subprocess
import sys
import shlex
from shlex import quote
model_file_path = "deepspeech-0.9.3-models.pbmm"
lm_file_path = "deepspeech-0.9.3-models.scorer"
url = "https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/"
urllib.request.urlretrieve(url + model_file_path, filename=model_file_path)
urllib.request.urlretrieve(url + lm_file_path, filename=lm_file_path)
beam_width = 100
lm_alpha = 0.93
lm_beta = 1.18
model = Model(model_file_path)
model.enableExternalScorer(lm_file_path)
model.setScorerAlphaBeta(lm_alpha, lm_beta)
model.setBeamWidth(beam_width)
def convert_samplerate(audio_path, desired_sample_rate):
sox_cmd = 'sox {} --type raw --bits 16 --channels 1 --rate {} --encoding signed-integer --endian little --compression 0.0 --no-dither - '.format(quote(audio_path), desired_sample_rate)
try:
output = subprocess.check_output(shlex.split(sox_cmd), stderr=subprocess.PIPE)
except subprocess.CalledProcessError as e:
raise RuntimeError('SoX returned non-zero status: {}'.format(e.stderr))
except OSError as e:
raise OSError(e.errno, 'SoX not found, use {}hz files or install it: {}'.format(desired_sample_rate, e.strerror))
return desired_sample_rate, np.frombuffer(output, np.int16)
def transcribe(audio_file):
desired_sample_rate = model.sampleRate()
fin = wave.open(audio_file, 'rb')
fs_orig = fin.getframerate()
if fs_orig != desired_sample_rate:
print('Warning: original sample rate ({}) is different than {}hz. Resampling might produce erratic speech recognition.'.format(fs_orig, desired_sample_rate), file=sys.stderr)
fs_new, audio = convert_samplerate(audio_file, desired_sample_rate)
else:
audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
audio_length = fin.getnframes() * (1/fs_orig)
fin.close()
text = model.stt(audio)
return text
demo = gr.Interface(
transcribe,
# [gr.Audio(source="microphone", streaming=True), "state"],
gr.Audio(label="Upload Audio File", source="upload", type="filepath"),
outputs=gr.Textbox(label="Transcript")
)
if __name__ == "__main__":
demo.launch()