File size: 897 Bytes
a263f35
ea52814
a263f35
 
 
 
ea52814
 
a263f35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import gradio as gr
from transformers import pipeline, Wav2Vec2ProcessorWithLM
from librosa import load, resample


asr_model = 'facebook/wav2vec2-base-960h'
processor = Wav2Vec2ProcessorWithLM.from_pretrained(asr_model)
asr = pipeline('automatic-speech-recognition', model=asr_model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, decoder=processor.decoder)

def transcribe(filepath):
	speech, sampling_rate = load(filepath)
	if sampling_rate != 16000:
		speech = resample(speech, sampling_rate, 16000)
	text = asr(speech)['text']
	return text

mic = gr.inputs.Audio(source='microphone', type='filepath', label='Speech input', optional=False)

transcript = gr.outputs.Textbox(type='auto', label='Transcription')

iface = gr.Interface(
	theme='huggingface',
	description='Testing transcription',
	fn=transcribe,
	inputs=[mic],
	outputs=[transcript]
)
iface.launch()