Spaces:

kurianbenoy
/

audioclassification

Running

App Files Files Community

audioclassification / app.py

kurianbenoy

Update app.py

17c370b over 2 years ago

raw

history blame

2.16 kB

	import gradio
	import torchaudio
	from fastai.vision.all import *
	from fastai.learner import load_learner
	from torchvision.utils import save_image
	from huggingface_hub import hf_hub_download


	model = load_learner(
	hf_hub_download("kurianbenoy/music_genre_classification_baseline", "model.pkl")
	)

	EXAMPLES_PATH = Path("./examples")
	labels = model.dls.vocab

	interface_options = {
	"title": "Music Genre Classification",
	"description": "A simple baseline model for classifying music genres with fast.ai on [Kaggle competition data](https://www.kaggle.com/competitions/kaggle-pog-series-s01e02/data)",
	"examples": [f"{EXAMPLES_PATH}/{f.name}" for f in EXAMPLES_PATH.iterdir()],
	"interpretation": "default",
	"layout": "horizontal",
	"theme": "default",
	}

	N_FFT = 2048
	HOP_LEN = 1024


	def create_spectrogram(filename):
	audio, sr = torchaudio.load(filename)
	specgram = torchaudio.transforms.MelSpectrogram(
	sample_rate=sr,
	n_fft=N_FFT,
	win_length=N_FFT,
	hop_length=HOP_LEN,
	center=True,
	pad_mode="reflect",
	power=2.0,
	norm="slaney",
	onesided=True,
	n_mels=224,
	mel_scale="htk",
	)(audio).mean(axis=0)
	specgram = torchaudio.transforms.AmplitudeToDB()(specgram)
	specgram = specgram - specgram.min()
	specgram = specgram / specgram.max()

	return specgram


	def create_image(filename):
	specgram = create_spectrogram(filename)
	dest = Path("temp.png")
	save_image(specgram, "temp.png")


	def predict(img):
	img = PILImage.create(img)
	_pred, _pred_w_idx, probs = model.predict(img)
	labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)}
	return labels_probs


	def end2endpipeline(filename):
	create_image(filename)
	return predict("temp.png")


	demo = gradio.Interface(
	fn=end2endpipeline,
	inputs=gradio.inputs.Audio(
	source="microphone", type="filepath", label="Record/ Drop audio"
	),
	outputs=gradio.outputs.Label(num_top_classes=5),
	**interface_options,
	)

	launch_options = {
	"enable_queue": True,
	"share": False,
	}

	demo.launch(**launch_options)