import gradio as gr from transformers import pipeline pipe = pipeline("audio-classification", model="juliensimon/wav2vec2-conformer-rel-pos-large-finetuned-speech-commands") def predict(audio_path): return pipe(audio_path)[0]["label"] demo = gr.Interface( title='Audio commands recognition', fn=predict, inputs=gr.Audio(source="upload", type='filepath'), outputs='text', ) demo.launch()