{ "cells": [ { "cell_type": "code", "execution_count": 4, "id": "8973fb4b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com, https://download.pytorch.org/whl/cu113\n", "Requirement already satisfied: torch in /opt/conda/lib/python3.8/site-packages (1.11.0)\n", "Requirement already satisfied: torchvision in /opt/conda/lib/python3.8/site-packages (0.12.0a0)\n", "Requirement already satisfied: torchaudio in /opt/conda/lib/python3.8/site-packages (0.11.0)\n", "Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.8/site-packages (from torch) (4.0.1)\n", "Collecting torchvision\n", " Downloading https://download.pytorch.org/whl/cu113/torchvision-0.12.0%2Bcu113-cp38-cp38-linux_x86_64.whl (22.3 MB)\n", "\u001b[K |████████████████████████████████| 22.3 MB 1.3 MB/s eta 0:00:01\n", "\u001b[?25hRequirement already satisfied: pillow!=8.3.*,>=5.3.0 in /opt/conda/lib/python3.8/site-packages (from torchvision) (9.0.0)\n", "Requirement already satisfied: numpy in /opt/conda/lib/python3.8/site-packages (from torchvision) (1.22.2)\n", "Requirement already satisfied: requests in /opt/conda/lib/python3.8/site-packages (from torchvision) (2.26.0)\n", "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests->torchvision) (3.1)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests->torchvision) (1.26.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests->torchvision) (2021.10.8)\n", "Requirement already satisfied: charset-normalizer~=2.0.0 in /opt/conda/lib/python3.8/site-packages (from requests->torchvision) (2.0.9)\n", "Installing collected packages: torchvision\n", " Attempting uninstall: torchvision\n", " Found existing installation: torchvision 0.12.0a0\n", " Uninstalling torchvision-0.12.0a0:\n", " Successfully uninstalled torchvision-0.12.0a0\n", "Successfully installed torchvision-0.12.0+cu113\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\n" ] } ], "source": [ "! pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113" ] }, { "cell_type": "code", "execution_count": 12, "id": "bf7451ce", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n", "Requirement already satisfied: torchvision in /opt/conda/lib/python3.8/site-packages (0.12.0+cu113)\n", "Requirement already satisfied: torch==1.11.0 in /opt/conda/lib/python3.8/site-packages (from torchvision) (1.11.0)\n", "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /opt/conda/lib/python3.8/site-packages (from torchvision) (9.0.0)\n", "Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.8/site-packages (from torchvision) (4.0.1)\n", "Requirement already satisfied: numpy in /opt/conda/lib/python3.8/site-packages (from torchvision) (1.22.2)\n", "Requirement already satisfied: requests in /opt/conda/lib/python3.8/site-packages (from torchvision) (2.26.0)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests->torchvision) (1.26.7)\n", "Requirement already satisfied: charset-normalizer~=2.0.0 in /opt/conda/lib/python3.8/site-packages (from requests->torchvision) (2.0.9)\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests->torchvision) (2021.10.8)\n", "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests->torchvision) (3.1)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\n" ] } ], "source": [ "! pip install torchvision" ] }, { "cell_type": "code", "execution_count": 13, "id": "90037405", "metadata": {}, "outputs": [], "source": [ "import torchaudio\n", "from fastai.vision.all import *\n", "from torchvision.utils import save_image" ] }, { "cell_type": "code", "execution_count": 4, "id": "cf93c763", "metadata": {}, "outputs": [], "source": [ "base_folder = Path('../input/kaggle-pog-series-s01e02')\n", "\n", "items = get_files(base_folder, extensions='.ogg')" ] }, { "cell_type": "code", "execution_count": 6, "id": "93f3e24d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(#24985) [Path('../input/kaggle-pog-series-s01e02/test/000003.ogg'),Path('../input/kaggle-pog-series-s01e02/test/000006.ogg'),Path('../input/kaggle-pog-series-s01e02/test/000008.ogg'),Path('../input/kaggle-pog-series-s01e02/test/000011.ogg'),Path('../input/kaggle-pog-series-s01e02/test/000017.ogg'),Path('../input/kaggle-pog-series-s01e02/test/000023.ogg'),Path('../input/kaggle-pog-series-s01e02/test/000024.ogg'),Path('../input/kaggle-pog-series-s01e02/test/000031.ogg'),Path('../input/kaggle-pog-series-s01e02/test/000032.ogg'),Path('../input/kaggle-pog-series-s01e02/test/000036.ogg')...]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "items\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "13c68e01", "metadata": {}, "outputs": [], "source": [ "N_FFT = 2048\n", "HOP_LEN = 1024\n", "\n", "\n", "def create_spectrogram(filename):\n", " audio, sr = torchaudio.load(filename)\n", " specgram = torchaudio.transforms.MelSpectrogram(sample_rate=sr, \n", " n_fft=N_FFT, \n", " win_length=N_FFT, \n", " hop_length=HOP_LEN\n", " ,\n", " center=True,\n", " pad_mode=\"reflect\",\n", " power=2.0,\n", " norm='slaney',\n", " onesided=True,\n", " n_mels=224,\n", " mel_scale=\"htk\"\n", " )(audio).mean(axis=0)\n", " specgram = torchaudio.transforms.AmplitudeToDB()(specgram)\n", " specgram = specgram - specgram.min()\n", " specgram = specgram/specgram.max()\n", " \n", " \n", " return specgram" ] }, { "cell_type": "code", "execution_count": 8, "id": "630a2a63", "metadata": {}, "outputs": [], "source": [ "filename = items[2]\n", "spec_default = create_spectrogram(filename)" ] }, { "cell_type": "code", "execution_count": 9, "id": "bd857529", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Path('../input/kaggle-pog-series-s01e02/test/000008.ogg')" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filename" ] }, { "cell_type": "code", "execution_count": 11, "id": "1eae215f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.imshow(spec_default)" ] }, { "cell_type": "code", "execution_count": 22, "id": "5a0afd6f", "metadata": {}, "outputs": [], "source": [ "def create_image(filename):\n", " specgram = create_spectrogram(filename)\n", " dest = Path(\"input/temp.png\")\n", " save_image(specgram, \"temp.png\")" ] }, { "cell_type": "code", "execution_count": 23, "id": "c52d69d2", "metadata": {}, "outputs": [], "source": [ "create_image(filename)" ] }, { "cell_type": "code", "execution_count": 24, "id": "ad35918d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PytorchAudioInference.ipynb music-genre-spectrogram-pogchamps\t temp.png\n", "kaggle-pog-series-s01e02 music-genre-torch-melspec-generator.log\n" ] } ], "source": [ "! ls" ] }, { "cell_type": "code", "execution_count": 26, "id": "daf3215e", "metadata": {}, "outputs": [], "source": [ "learn = load_learner(\"music-genre-spectrogram-pogchamps/spectograms/model.pkl\")" ] }, { "cell_type": "code", "execution_count": 31, "id": "c990969f", "metadata": {}, "outputs": [], "source": [ "labels = learn.dls.vocab" ] }, { "cell_type": "code", "execution_count": 32, "id": "ebfefcd3", "metadata": {}, "outputs": [], "source": [ "def predict(img):\n", " img = PILImage.create(img)\n", " _pred, _pred_w_idx, probs = learn.predict(img)\n", " labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)}\n", " return labels_probs" ] }, { "cell_type": "code", "execution_count": 33, "id": "11b17142", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "{'Ambient Electronic': 0.018784182146191597,\n", " 'Blues': 0.001689370721578598,\n", " 'Chiptune / Glitch': 0.009157774038612843,\n", " 'Classical': 0.0018330742605030537,\n", " 'Country': 0.015161271207034588,\n", " 'Easy Listening': 0.000761857838369906,\n", " 'Electronic': 0.043093256652355194,\n", " 'Experimental': 0.01893473044037819,\n", " 'Folk': 0.03622647374868393,\n", " 'Hip-Hop': 0.012909098528325558,\n", " 'Instrumental': 0.03738876059651375,\n", " 'International': 0.007503754459321499,\n", " 'Jazz': 0.002992472844198346,\n", " 'Old-Time / Historic': 0.0014046949800103903,\n", " 'Pop': 0.14049866795539856,\n", " 'Punk': 0.1848350614309311,\n", " 'Rock': 0.4632216989994049,\n", " 'Soul-RnB': 0.002242171438410878,\n", " 'Spoken': 0.0013616250362247229}" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predict(\"temp.png\")" ] }, { "cell_type": "code", "execution_count": null, "id": "63aa6dd6", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.12" } }, "nbformat": 4, "nbformat_minor": 5 }