call-sentiment / app.py
ktangri
Add LM-boosted decoding
ea52814
raw
history blame
897 Bytes
import gradio as gr
from transformers import pipeline, Wav2Vec2ProcessorWithLM
from librosa import load, resample
asr_model = 'facebook/wav2vec2-base-960h'
processor = Wav2Vec2ProcessorWithLM.from_pretrained(asr_model)
asr = pipeline('automatic-speech-recognition', model=asr_model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, decoder=processor.decoder)
def transcribe(filepath):
speech, sampling_rate = load(filepath)
if sampling_rate != 16000:
speech = resample(speech, sampling_rate, 16000)
text = asr(speech)['text']
return text
mic = gr.inputs.Audio(source='microphone', type='filepath', label='Speech input', optional=False)
transcript = gr.outputs.Textbox(type='auto', label='Transcription')
iface = gr.Interface(
theme='huggingface',
description='Testing transcription',
fn=transcribe,
inputs=[mic],
outputs=[transcript]
)
iface.launch()