from transformers import pipeline import gradio as gr model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-spanish") title = "AudioToText" description = "Please record audio on spanish for transcript" def transcript(audio): text = model(audio)["text"] return text gr.Interface( fn=transcript, inputs=[gr.Audio(source="microphone", type="filepath")], outputs=["textbox"], title=title, description=description ).launch()