{ "cells": [ { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The autoreload extension is already loaded. To reload it, use:\n", " %reload_ext autoreload\n" ] } ], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "README.md \u001b[34mmodel_checkpoints\u001b[m\u001b[m recommender.py\n", "app.py \u001b[34mnotebooks\u001b[m\u001b[m requirements.txt\n", "\u001b[34mdata\u001b[m\u001b[m orchestrate_audio_data.py \u001b[34msrc\u001b[m\u001b[m\n" ] } ], "source": [ "!ls" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import librosa\n", "import torch\n", "from src import laion_clap\n", "from glob import glob\n", "import pandas as pd\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model = laion_clap.CLAP_Module(enable_fusion=False, amodel= 'HTSAT-base')\n", "model.load_ckpt(ckpt=\"music_audioset_epoch_15_esc_90.14.pt\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "def load_music_file(file_name):\n", " audio_data, _ = librosa.load(file_name, sr=48000) # sample rate should be 48000\n", " audio_data = audio_data.reshape(1, -1) # Make it (1,T) or (N,T)\n", " # audio_data = torch.from_numpy(int16_to_float32(float32_to_int16(audio_data))).float() # quantize before send it in to the model\n", " with torch.no_grad():\n", " audio_embed = model.get_audio_embedding_from_data(x = audio_data, use_tensor=False)\n", " return audio_embed\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "music_files = glob(\"/Users/berkayg/Codes/music-project/AudioCLIP/data/downloaded_tracks/*.wav\")" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "import pickle\n", "with open(\"/Users/berkayg/Codes/music-project/laion-clap-project/curate-me-a-playlist/data/vectors/song_names.pkl\", \"rb\") as reader:\n", " ls = pickle.load(reader)\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/sr/r72219hj06x_1xvw7hhd517h0000gn/T/ipykernel_18860/3009710654.py:2: UserWarning: PySoundFile failed. Trying audioread instead.\n", " audio_data, _ = librosa.load(file_name, sr=48000) # sample rate should be 48000\n", "/Users/berkayg/miniforge3/envs/playlist-curator/lib/python3.10/site-packages/librosa/core/audio.py:183: FutureWarning: librosa.core.audio.__audioread_load\n", "\tDeprecated as of librosa version 0.10.0.\n", "\tIt will be removed in librosa version 1.0.\n", " y, sr_native = __audioread_load(path, offset, duration, dtype)\n" ] } ], "source": [ "music_data = np.zeros((len(music_files), 512), dtype=np.float32)\n", "for m in range(music_data.shape[0]):\n", " music_data[m] = load_music_file(music_files[m])\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(1, 512)\n" ] } ], "source": [ "text_data = [\"This audio is a romantic song\"] \n", "text_embed = model.get_text_embedding(text_data)\n", "print(text_embed.shape)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "song_names = [k.split(\"/\")[-1] for k in music_files]" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([100, 1])\n" ] } ], "source": [ "with torch.no_grad():\n", " ranking = torch.tensor(music_data) @ torch.tensor(text_embed).t()\n", " ranking = ranking[:, 0].reshape(-1, 1)\n", "print(ranking.shape)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | This audio is a romantic song | \n", "
---|---|
Coldplay - Charlie Brown.wav | \n", "0.400684 | \n", "
Sam Smith - I'm Not The Only One.wav | \n", "0.373561 | \n", "
Pink Floyd - The Great Gig In The Sky - 2011 Remastered Version.wav | \n", "0.371584 | \n", "
Christina Aguilera - You Lost Me.wav | \n", "0.370390 | \n", "
Lana Del Rey - Yayo.wav | \n", "0.370379 | \n", "
Queen - It's A Hard Life - Remastered 2011.wav | \n", "0.348699 | \n", "
Teoman - Haziran.wav | \n", "0.331220 | \n", "
John Lennon - Imagine - Remastered 2010.wav | \n", "0.330397 | \n", "
Sleeping At Last - Mars.wav | \n", "0.328770 | \n", "
Adele - Someone Like You.wav | \n", "0.325650 | \n", "
Coldplay - What If.wav | \n", "0.315717 | \n", "
Adamlar - Orda Ortada.wav | \n", "0.306465 | \n", "
Eric Clapton - Autumn Leaves.wav | \n", "0.305451 | \n", "
Premiata Forneria Marconi - Impressioni di settembre.wav | \n", "0.295878 | \n", "
Guthrie Govan - Lost in Rio.wav | \n", "0.284883 | \n", "