|
import gradio as gr |
|
import torch |
|
from transformers import pipeline |
|
|
|
username = "ardneebwar" |
|
model_id = f"{username}/wav2vec2-animal-sounds-finetuned-hubert-finetuned-animals" |
|
device = "cuda:0" if torch.cuda.is_available() else "cpu" |
|
pipe = pipeline("audio-classification", model=model_id, device=device) |
|
|
|
|
|
def classify_audio(filepath): |
|
import time |
|
start_time = time.time() |
|
|
|
|
|
preds = pipe(filepath) |
|
|
|
outputs = {} |
|
for p in preds: |
|
outputs[p["label"]] = p["score"] |
|
|
|
end_time = time.time() |
|
prediction_time = end_time - start_time |
|
|
|
return outputs, prediction_time |
|
|
|
title = "🎵 Animal Sound Classifier" |
|
description = """ |
|
Animal Sound Classifier model (Fine-tuned "facebook/hubert-base-ls960") | Dataset: ESC-50 from Github (only the animal sounds) | Better to use audios 5 seconds long. |
|
""" |
|
|
|
filenames = ['cat.wav', 'dog.mp3', 'rooster.mp3'] |
|
filenames = [f"./{f}" for f in filenames] |
|
|
|
demo = gr.Interface( |
|
fn=classify_audio, |
|
inputs=gr.Audio(type="filepath", label="Upload your audio file"), |
|
outputs=[gr.Label(label="Predicted Animal Sound"), gr.Number(label="Prediction time (s)")], |
|
title=title, |
|
description=description, |
|
theme="huggingface", |
|
examples=[("cat.wav"), ("dog.mp3"), ("rooster.mp3")], |
|
live=False |
|
) |
|
|
|
demo.launch() |