{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Import Dependancies" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Load Data Files" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "dir = \"SpotGenTrack/Data Sources/\"\n", "albums = pd.read_csv(dir + \"spotify_albums.csv\")\n", "artists = pd.read_csv(dir + \"spotify_artists.csv\")\n", "tracks = pd.read_csv(dir + \"spotify_tracks.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0album_typeartist_idavailable_marketsexternal_urlshrefidimagesnamerelease_daterelease_date_precisiontotal_trackstrack_idtrack_name_prevuritype
00single3DiDSECUqqY1AuBP8qtaIa['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH...{'spotify': 'https://open.spotify.com/album/1g...https://api.spotify.com/v1/albums/1gAM7M4rBwEb...1gAM7M4rBwEbSPeAQR2nx1[{'height': 640, 'url': 'https://i.scdn.co/ima...If I Ain't Got You EP2019-02-08day62iejTMy9XZ8Gaae0aQ2yl0track_32spotify:album:1gAM7M4rBwEbSPeAQR2nx1album
11album6s1pCNXcbdtQJlsnM1hRIA['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH...{'spotify': 'https://open.spotify.com/album/4K...https://api.spotify.com/v1/albums/4KfJZV7WfolY...4KfJZV7WfolYlxBzOTo66s[{'height': 640, 'url': 'https://i.scdn.co/ima...Shostakovich Symphony No.5 - Four Romances on ...2019-03-01day81WQfghEjszJJ4H8MAWrQ2Ctrack_11spotify:album:4KfJZV7WfolYlxBzOTo66salbum
22single5YjfNaHq05WrwldRe1QSBc['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH...{'spotify': 'https://open.spotify.com/album/7n...https://api.spotify.com/v1/albums/7nLYY7uAVUb5...7nLYY7uAVUb57kpd7tZxnS[{'height': 640, 'url': 'https://i.scdn.co/ima...Take My Bass2019-03-14day13jJKj4QTK3v18ZSwpk7AcVtrack_15spotify:album:7nLYY7uAVUb57kpd7tZxnSalbum
33single2G9Vc16JCpnZmK4uGH46Fa['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH...{'spotify': 'https://open.spotify.com/album/6p...https://api.spotify.com/v1/albums/6p20Rt4x2Qn5...6p20Rt4x2Qn5mUMRi1s6pj[{'height': 640, 'url': 'https://i.scdn.co/ima...Hypnotizing (Are U)2016-11-16day11xGtDafUZbHyYC3Xarcbrjtrack_46spotify:album:6p20Rt4x2Qn5mUMRi1s6pjalbum
44single2dwM9OcE4c3Ph1UBINSodx['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH...{'spotify': 'https://open.spotify.com/album/1X...https://api.spotify.com/v1/albums/1XeoOqC1q7U2...1XeoOqC1q7U2iyLEQJ64cu[{'height': 640, 'url': 'https://i.scdn.co/ima...Sunshine2018-07-20day10gWtsXvXOzAT6FtM3ur8intrack_10spotify:album:1XeoOqC1q7U2iyLEQJ64cualbum
\n", "
" ], "text/plain": [ " Unnamed: 0 album_type artist_id \\\n", "0 0 single 3DiDSECUqqY1AuBP8qtaIa \n", "1 1 album 6s1pCNXcbdtQJlsnM1hRIA \n", "2 2 single 5YjfNaHq05WrwldRe1QSBc \n", "3 3 single 2G9Vc16JCpnZmK4uGH46Fa \n", "4 4 single 2dwM9OcE4c3Ph1UBINSodx \n", "\n", " available_markets \\\n", "0 ['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH... \n", "1 ['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH... \n", "2 ['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH... \n", "3 ['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH... \n", "4 ['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH... \n", "\n", " external_urls \\\n", "0 {'spotify': 'https://open.spotify.com/album/1g... \n", "1 {'spotify': 'https://open.spotify.com/album/4K... \n", "2 {'spotify': 'https://open.spotify.com/album/7n... \n", "3 {'spotify': 'https://open.spotify.com/album/6p... \n", "4 {'spotify': 'https://open.spotify.com/album/1X... \n", "\n", " href id \\\n", "0 https://api.spotify.com/v1/albums/1gAM7M4rBwEb... 1gAM7M4rBwEbSPeAQR2nx1 \n", "1 https://api.spotify.com/v1/albums/4KfJZV7WfolY... 4KfJZV7WfolYlxBzOTo66s \n", "2 https://api.spotify.com/v1/albums/7nLYY7uAVUb5... 7nLYY7uAVUb57kpd7tZxnS \n", "3 https://api.spotify.com/v1/albums/6p20Rt4x2Qn5... 6p20Rt4x2Qn5mUMRi1s6pj \n", "4 https://api.spotify.com/v1/albums/1XeoOqC1q7U2... 1XeoOqC1q7U2iyLEQJ64cu \n", "\n", " images \\\n", "0 [{'height': 640, 'url': 'https://i.scdn.co/ima... \n", "1 [{'height': 640, 'url': 'https://i.scdn.co/ima... \n", "2 [{'height': 640, 'url': 'https://i.scdn.co/ima... \n", "3 [{'height': 640, 'url': 'https://i.scdn.co/ima... \n", "4 [{'height': 640, 'url': 'https://i.scdn.co/ima... \n", "\n", " name release_date \\\n", "0 If I Ain't Got You EP 2019-02-08 \n", "1 Shostakovich Symphony No.5 - Four Romances on ... 2019-03-01 \n", "2 Take My Bass 2019-03-14 \n", "3 Hypnotizing (Are U) 2016-11-16 \n", "4 Sunshine 2018-07-20 \n", "\n", " release_date_precision total_tracks track_id \\\n", "0 day 6 2iejTMy9XZ8Gaae0aQ2yl0 \n", "1 day 8 1WQfghEjszJJ4H8MAWrQ2C \n", "2 day 1 3jJKj4QTK3v18ZSwpk7AcV \n", "3 day 1 1xGtDafUZbHyYC3Xarcbrj \n", "4 day 1 0gWtsXvXOzAT6FtM3ur8in \n", "\n", " track_name_prev uri type \n", "0 track_32 spotify:album:1gAM7M4rBwEbSPeAQR2nx1 album \n", "1 track_11 spotify:album:4KfJZV7WfolYlxBzOTo66s album \n", "2 track_15 spotify:album:7nLYY7uAVUb57kpd7tZxnS album \n", "3 track_46 spotify:album:6p20Rt4x2Qn5mUMRi1s6pj album \n", "4 track_10 spotify:album:1XeoOqC1q7U2iyLEQJ64cu album " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "Index(['Unnamed: 0', 'album_type', 'artist_id', 'available_markets',\n", " 'external_urls', 'href', 'id', 'images', 'name', 'release_date',\n", " 'release_date_precision', 'total_tracks', 'track_id', 'track_name_prev',\n", " 'uri', 'type'],\n", " dtype='object')" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Inspect albums\n", "display(albums.head())\n", "\n", "# Show columns\n", "albums.columns" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0artist_popularityfollowersgenresidnametrack_idtrack_name_prevtype
004423230['sertanejo', 'sertanejo pop', 'sertanejo trad...4mGnpjhqgx4RUdsIJiURdoJuliano Cezar0wmDmAILuW9e2aRttkl4aCtrack_9artist
1122313[]1dLnVku4VQUOLswwDFvRc9The Grenadines4wqwj0gA8qPZKLl5WVqXmltrack_30artist
22261596['danish pop rock']6YVY310fjfUzKi8hiqR7iKGangway1bFqWDbvHmZe2f4Nf9qaD8track_38artist
3331149['uk alternative pop']2VElyouiCfoYPDJluzwJwKFADES3MFSUBAidPzRBbIS7BDj1Strack_34artist
442111['french baroque']4agVy03qW8juSysCTUOuDIJean-Pierre Guignon2r3q57FhxdsCyYr0kuDq4btrack_26artist
\n", "
" ], "text/plain": [ " Unnamed: 0 artist_popularity followers \\\n", "0 0 44 23230 \n", "1 1 22 313 \n", "2 2 26 1596 \n", "3 3 31 149 \n", "4 4 21 11 \n", "\n", " genres id \\\n", "0 ['sertanejo', 'sertanejo pop', 'sertanejo trad... 4mGnpjhqgx4RUdsIJiURdo \n", "1 [] 1dLnVku4VQUOLswwDFvRc9 \n", "2 ['danish pop rock'] 6YVY310fjfUzKi8hiqR7iK \n", "3 ['uk alternative pop'] 2VElyouiCfoYPDJluzwJwK \n", "4 ['french baroque'] 4agVy03qW8juSysCTUOuDI \n", "\n", " name track_id track_name_prev type \n", "0 Juliano Cezar 0wmDmAILuW9e2aRttkl4aC track_9 artist \n", "1 The Grenadines 4wqwj0gA8qPZKLl5WVqXml track_30 artist \n", "2 Gangway 1bFqWDbvHmZe2f4Nf9qaD8 track_38 artist \n", "3 FADES 3MFSUBAidPzRBbIS7BDj1S track_34 artist \n", "4 Jean-Pierre Guignon 2r3q57FhxdsCyYr0kuDq4b track_26 artist " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "Index(['Unnamed: 0', 'artist_popularity', 'followers', 'genres', 'id', 'name',\n", " 'track_id', 'track_name_prev', 'type'],\n", " dtype='object')" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Inspect artists\n", "display(artists.head())\n", "\n", "# Show columns\n", "artists.columns" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0acousticnessalbum_idanalysis_urlartists_idavailable_marketscountrydanceabilitydisc_numberduration_ms...preview_urlspeechinesstempotime_signaturetrack_hreftrack_name_prevtrack_numberurivalencetype
000.2940D3QufeCudpQANOR7luqdrhttps://api.spotify.com/v1/audio-analysis/5qlj...['3mxJuHRn2ZWD5OofvJtDZY']['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH...BE0.6981.0235584.0...https://p.scdn.co/mp3-preview/1b05a902da3a251d...0.0262115.0184.0https://api.spotify.com/v1/tracks/5qljLQuKnNJf...track_141.0spotify:track:5qljLQuKnNJf4F4vfxQB0V0.6220track
110.8631bcqsH5UyTBzmh9YizdsBEhttps://api.spotify.com/v1/audio-analysis/3VAX...['4xWMewm6CYMstu0sPgd9jJ']['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH...BE0.7191.0656960.0...https://p.scdn.co/mp3-preview/d8140736a6131cb5...0.9220115.0753.0https://api.spotify.com/v1/tracks/3VAX2MJdmdqA...track_33.0spotify:track:3VAX2MJdmdqARLSU5hPMpm0.5890track
220.7504tKijjmxGClg4JOLAyo2qEhttps://api.spotify.com/v1/audio-analysis/1L3Y...['3hYaK5FF3YAglCj5HZgBnP']['GB']BE0.4661.0492840.0...https://p.scdn.co/mp3-preview/c8af28fb15185b18...0.944079.5654.0https://api.spotify.com/v1/tracks/1L3YAhsEMrGV...track_44.0spotify:track:1L3YAhsEMrGVvCgDXj2TYn0.0850track
330.7636FeJF5r8roonnKraJxr4oBhttps://api.spotify.com/v1/audio-analysis/6aCe...['2KQsUB9DRBcJk17JWX1eXD']['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH...BE0.7191.0316578.0...https://p.scdn.co/mp3-preview/7629b8e9f31f6e9b...0.9380112.8223.0https://api.spotify.com/v1/tracks/6aCe9zzoZmCo...track_91.0spotify:track:6aCe9zzoZmCojX7bbgKKtf0.5330track
440.7704tKijjmxGClg4JOLAyo2qEhttps://api.spotify.com/v1/audio-analysis/1Vo8...['3hYaK5FF3YAglCj5HZgBnP']['GB']BE0.4601.0558880.0...https://p.scdn.co/mp3-preview/32be593c0eb82868...0.943081.2604.0https://api.spotify.com/v1/tracks/1Vo802A38tPF...track_22.0spotify:track:1Vo802A38tPFHmje1h91um0.0906track
\n", "

5 rows × 32 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 acousticness album_id \\\n", "0 0 0.294 0D3QufeCudpQANOR7luqdr \n", "1 1 0.863 1bcqsH5UyTBzmh9YizdsBE \n", "2 2 0.750 4tKijjmxGClg4JOLAyo2qE \n", "3 3 0.763 6FeJF5r8roonnKraJxr4oB \n", "4 4 0.770 4tKijjmxGClg4JOLAyo2qE \n", "\n", " analysis_url \\\n", "0 https://api.spotify.com/v1/audio-analysis/5qlj... \n", "1 https://api.spotify.com/v1/audio-analysis/3VAX... \n", "2 https://api.spotify.com/v1/audio-analysis/1L3Y... \n", "3 https://api.spotify.com/v1/audio-analysis/6aCe... \n", "4 https://api.spotify.com/v1/audio-analysis/1Vo8... \n", "\n", " artists_id \\\n", "0 ['3mxJuHRn2ZWD5OofvJtDZY'] \n", "1 ['4xWMewm6CYMstu0sPgd9jJ'] \n", "2 ['3hYaK5FF3YAglCj5HZgBnP'] \n", "3 ['2KQsUB9DRBcJk17JWX1eXD'] \n", "4 ['3hYaK5FF3YAglCj5HZgBnP'] \n", "\n", " available_markets country danceability \\\n", "0 ['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH... BE 0.698 \n", "1 ['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH... BE 0.719 \n", "2 ['GB'] BE 0.466 \n", "3 ['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH... BE 0.719 \n", "4 ['GB'] BE 0.460 \n", "\n", " disc_number duration_ms ... \\\n", "0 1.0 235584.0 ... \n", "1 1.0 656960.0 ... \n", "2 1.0 492840.0 ... \n", "3 1.0 316578.0 ... \n", "4 1.0 558880.0 ... \n", "\n", " preview_url speechiness tempo \\\n", "0 https://p.scdn.co/mp3-preview/1b05a902da3a251d... 0.0262 115.018 \n", "1 https://p.scdn.co/mp3-preview/d8140736a6131cb5... 0.9220 115.075 \n", "2 https://p.scdn.co/mp3-preview/c8af28fb15185b18... 0.9440 79.565 \n", "3 https://p.scdn.co/mp3-preview/7629b8e9f31f6e9b... 0.9380 112.822 \n", "4 https://p.scdn.co/mp3-preview/32be593c0eb82868... 0.9430 81.260 \n", "\n", " time_signature track_href \\\n", "0 4.0 https://api.spotify.com/v1/tracks/5qljLQuKnNJf... \n", "1 3.0 https://api.spotify.com/v1/tracks/3VAX2MJdmdqA... \n", "2 4.0 https://api.spotify.com/v1/tracks/1L3YAhsEMrGV... \n", "3 3.0 https://api.spotify.com/v1/tracks/6aCe9zzoZmCo... \n", "4 4.0 https://api.spotify.com/v1/tracks/1Vo802A38tPF... \n", "\n", " track_name_prev track_number uri \\\n", "0 track_14 1.0 spotify:track:5qljLQuKnNJf4F4vfxQB0V \n", "1 track_3 3.0 spotify:track:3VAX2MJdmdqARLSU5hPMpm \n", "2 track_4 4.0 spotify:track:1L3YAhsEMrGVvCgDXj2TYn \n", "3 track_9 1.0 spotify:track:6aCe9zzoZmCojX7bbgKKtf \n", "4 track_2 2.0 spotify:track:1Vo802A38tPFHmje1h91um \n", "\n", " valence type \n", "0 0.6220 track \n", "1 0.5890 track \n", "2 0.0850 track \n", "3 0.5330 track \n", "4 0.0906 track \n", "\n", "[5 rows x 32 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "Index(['Unnamed: 0', 'acousticness', 'album_id', 'analysis_url', 'artists_id',\n", " 'available_markets', 'country', 'danceability', 'disc_number',\n", " 'duration_ms', 'energy', 'href', 'id', 'instrumentalness', 'key',\n", " 'liveness', 'loudness', 'lyrics', 'mode', 'name', 'playlist',\n", " 'popularity', 'preview_url', 'speechiness', 'tempo', 'time_signature',\n", " 'track_href', 'track_name_prev', 'track_number', 'uri', 'valence',\n", " 'type'],\n", " dtype='object')" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Inspect tracks\n", "display(tracks.head())\n", "\n", "# Show columns\n", "tracks.columns" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "## Combine three files into \n", "# drop irrelevant columns\n", "# get only tracks after 1990\n", "def join_genre_and_date(artist_df, album_df, track_df):\n", " album = album_df.rename(columns={'id':\"album_id\"}).set_index('album_id')\n", " artist = artist_df.rename(columns={'id':\"artists_id\",'name':\"artists_name\"}).set_index('artists_id')\n", " track = track_df.set_index('album_id').join(album['release_date'], on='album_id' )\n", " track.artists_id = track.artists_id.apply(lambda x: x[2:-2])\n", " track = track.set_index('artists_id').join(artist[['artists_name','genres']], on='artists_id' )\n", " track.reset_index(drop=False, inplace=True)\n", " track['release_year'] = pd.to_datetime(track.release_date).dt.year\n", " track.drop(columns = ['Unnamed: 0','country','track_name_prev','track_number','type'], inplace = True)\n", " \n", " return track" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "def get_filtered_track_df(df, genres_to_include):\n", " df['genres'] = df.genres.apply(lambda x: [i[1:-1] for i in str(x)[1:-1].split(\", \")])\n", " df_exploded = df.explode(\"genres\")[df.explode(\"genres\")[\"genres\"].isin(genres_to_include)]\n", " df_exploded.loc[df_exploded[\"genres\"]==\"korean pop\", \"genres\"] = \"k-pop\"\n", " df_exploded_indices = list(df_exploded.index.unique())\n", " df = df[df.index.isin(df_exploded_indices)]\n", " df = df.reset_index(drop=True)\n", " \n", " return df" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "track_with_year_and_genre = join_genre_and_date(artists, albums, tracks)\n", "genres_to_include = genres = ['dance pop', 'electronic', 'electropop', 'hip hop', 'jazz', 'k-pop', 'latin', 'pop', 'pop rap', 'r&b', 'rock']\n", "filtered_track_df = get_filtered_track_df(track_with_year_and_genre, genres_to_include)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "filtered_track_df[\"uri\"] = filtered_track_df[\"uri\"].str.replace(\"spotify:track:\", \"\")\n", "filtered_track_df = filtered_track_df.drop(columns=['analysis_url', 'available_markets'])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
artists_idacousticnessdanceabilitydisc_numberduration_msenergyhrefidinstrumentalnesskey...speechinesstempotime_signaturetrack_hrefurivalencerelease_dateartists_namegenresrelease_year
068WwJXWrpo1yVOOIZjLSeT0.02680.5061.0248777.00.741https://api.spotify.com/v1/tracks/0UATU9OJxh4m...0UATU9OJxh4m3fwDljdGZn0.0000271.0...0.034994.0424.0https://api.spotify.com/v1/tracks/0UATU9OJxh4m...0UATU9OJxh4m3fwDljdGZn0.2362018-09-28Evalyn[electropop, indie electro-pop, indie poptimis...2018
109xj0S68Y1OU1vHMCZAIvz0.50500.4871.0171573.00.297https://api.spotify.com/v1/tracks/4JH1M62gVDND...4JH1M62gVDNDhDAUiQB3Qv0.00005211.0...0.0915185.9123.0https://api.spotify.com/v1/tracks/4JH1M62gVDND...4JH1M62gVDNDhDAUiQB3Qv0.2892001-08-21Café Tacvba[latin, latin alternative, latin rock, mexican...2001
26pSsE5y0uJMwYj83KrPyf90.13300.6291.0207396.00.706https://api.spotify.com/v1/tracks/0h7Ld5CvgzaU...0h7Ld5CvgzaUN1zA3tdyPq0.0000001.0...0.436081.2204.0https://api.spotify.com/v1/tracks/0h7Ld5CvgzaU...0h7Ld5CvgzaUN1zA3tdyPq0.5432019-01-25Dawn Richard[alternative r&b, deep pop r&b, escape room, h...2019
37slfeZO9LsJbWgpkIoXBUJ0.40600.5901.0279000.00.597https://api.spotify.com/v1/tracks/4S1bYWrLOC8s...4S1bYWrLOC8smuy8kJzxKQ0.0000239.0...0.0275121.0514.0https://api.spotify.com/v1/tracks/4S1bYWrLOC8s...4S1bYWrLOC8smuy8kJzxKQ0.4661995-09-12Ricky Martin[dance pop, latin, latin pop, mexican pop, pop...1995
409hVIj6vWgoCDtT03h8ZCa0.03160.7271.0218773.00.380https://api.spotify.com/v1/tracks/758mQT4zzlvB...758mQT4zzlvBhy9PvNePwC0.0000007.0...0.335092.0504.0https://api.spotify.com/v1/tracks/758mQT4zzlvB...758mQT4zzlvBhy9PvNePwC0.4551991-09-24A Tribe Called Quest[alternative hip hop, conscious hip hop, east ...1991
\n", "

5 rows × 28 columns

\n", "
" ], "text/plain": [ " artists_id acousticness danceability disc_number \\\n", "0 68WwJXWrpo1yVOOIZjLSeT 0.0268 0.506 1.0 \n", "1 09xj0S68Y1OU1vHMCZAIvz 0.5050 0.487 1.0 \n", "2 6pSsE5y0uJMwYj83KrPyf9 0.1330 0.629 1.0 \n", "3 7slfeZO9LsJbWgpkIoXBUJ 0.4060 0.590 1.0 \n", "4 09hVIj6vWgoCDtT03h8ZCa 0.0316 0.727 1.0 \n", "\n", " duration_ms energy href \\\n", "0 248777.0 0.741 https://api.spotify.com/v1/tracks/0UATU9OJxh4m... \n", "1 171573.0 0.297 https://api.spotify.com/v1/tracks/4JH1M62gVDND... \n", "2 207396.0 0.706 https://api.spotify.com/v1/tracks/0h7Ld5CvgzaU... \n", "3 279000.0 0.597 https://api.spotify.com/v1/tracks/4S1bYWrLOC8s... \n", "4 218773.0 0.380 https://api.spotify.com/v1/tracks/758mQT4zzlvB... \n", "\n", " id instrumentalness key ... speechiness tempo \\\n", "0 0UATU9OJxh4m3fwDljdGZn 0.000027 1.0 ... 0.0349 94.042 \n", "1 4JH1M62gVDNDhDAUiQB3Qv 0.000052 11.0 ... 0.0915 185.912 \n", "2 0h7Ld5CvgzaUN1zA3tdyPq 0.000000 1.0 ... 0.4360 81.220 \n", "3 4S1bYWrLOC8smuy8kJzxKQ 0.000023 9.0 ... 0.0275 121.051 \n", "4 758mQT4zzlvBhy9PvNePwC 0.000000 7.0 ... 0.3350 92.050 \n", "\n", " time_signature track_href \\\n", "0 4.0 https://api.spotify.com/v1/tracks/0UATU9OJxh4m... \n", "1 3.0 https://api.spotify.com/v1/tracks/4JH1M62gVDND... \n", "2 4.0 https://api.spotify.com/v1/tracks/0h7Ld5CvgzaU... \n", "3 4.0 https://api.spotify.com/v1/tracks/4S1bYWrLOC8s... \n", "4 4.0 https://api.spotify.com/v1/tracks/758mQT4zzlvB... \n", "\n", " uri valence release_date artists_name \\\n", "0 0UATU9OJxh4m3fwDljdGZn 0.236 2018-09-28 Evalyn \n", "1 4JH1M62gVDNDhDAUiQB3Qv 0.289 2001-08-21 Café Tacvba \n", "2 0h7Ld5CvgzaUN1zA3tdyPq 0.543 2019-01-25 Dawn Richard \n", "3 4S1bYWrLOC8smuy8kJzxKQ 0.466 1995-09-12 Ricky Martin \n", "4 758mQT4zzlvBhy9PvNePwC 0.455 1991-09-24 A Tribe Called Quest \n", "\n", " genres release_year \n", "0 [electropop, indie electro-pop, indie poptimis... 2018 \n", "1 [latin, latin alternative, latin rock, mexican... 2001 \n", "2 [alternative r&b, deep pop r&b, escape room, h... 2019 \n", "3 [dance pop, latin, latin pop, mexican pop, pop... 1995 \n", "4 [alternative hip hop, conscious hip hop, east ... 1991 \n", "\n", "[5 rows x 28 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "Index(['artists_id', 'acousticness', 'danceability', 'disc_number',\n", " 'duration_ms', 'energy', 'href', 'id', 'instrumentalness', 'key',\n", " 'liveness', 'loudness', 'lyrics', 'mode', 'name', 'playlist',\n", " 'popularity', 'preview_url', 'speechiness', 'tempo', 'time_signature',\n", " 'track_href', 'uri', 'valence', 'release_date', 'artists_name',\n", " 'genres', 'release_year'],\n", " dtype='object')" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "display(filtered_track_df.head())\n", "filtered_track_df.columns" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "filtered_track_df.to_csv(\"clean_data.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0acousticnessdanceabilitydisc_numberduration_msenergyinstrumentalnesskeylivenessloudnessmodepopularityspeechinesstempotime_signaturetrack_numbervalence
count101939.000000101939.000000101939.000000101939.0000001.019390e+05101939.000000101939.000000101939.000000101939.000000101939.000000101939.000000101939.000000101939.000000101939.000000101939.000000101939.000000101939.000000
mean50969.0000000.3521240.5860151.0321662.467708e+050.5864790.1487765.2708580.197640-9.4627200.61815439.7823110.128841118.3585273.8756514.6080600.482813
std29427.3988830.3348550.1777240.5667891.904303e+050.2601700.3040243.5776790.1753916.1985080.48584116.7907690.20332430.2240740.5170087.1818050.261690
min0.0000000.0000000.0000001.0000001.155000e+030.0000000.0000000.0000000.000000-60.0000000.0000000.0000000.0000000.0000000.0000001.0000000.000000
25%25484.5000000.0407000.4800001.0000001.840000e+050.4110000.0000002.0000000.095600-11.1490000.00000029.0000000.03640095.9730004.0000001.0000000.271000
50%50969.0000000.2380000.6100001.0000002.168930e+050.6290000.0000375.0000000.124000-7.5990001.00000041.0000000.050600118.0670004.0000002.0000000.477000
75%76453.5000000.6450000.7140001.0000002.610550e+050.7980000.0344008.0000000.241000-5.5090001.00000052.0000000.104000136.0450004.0000006.0000000.693000
max101938.0000000.9960000.98900081.0000005.505831e+061.0000001.00000011.0000000.9990002.7190001.00000097.0000000.969000244.0350005.000000655.0000000.993000
\n", "
" ], "text/plain": [ " Unnamed: 0 acousticness danceability disc_number \\\n", "count 101939.000000 101939.000000 101939.000000 101939.000000 \n", "mean 50969.000000 0.352124 0.586015 1.032166 \n", "std 29427.398883 0.334855 0.177724 0.566789 \n", "min 0.000000 0.000000 0.000000 1.000000 \n", "25% 25484.500000 0.040700 0.480000 1.000000 \n", "50% 50969.000000 0.238000 0.610000 1.000000 \n", "75% 76453.500000 0.645000 0.714000 1.000000 \n", "max 101938.000000 0.996000 0.989000 81.000000 \n", "\n", " duration_ms energy instrumentalness key \\\n", "count 1.019390e+05 101939.000000 101939.000000 101939.000000 \n", "mean 2.467708e+05 0.586479 0.148776 5.270858 \n", "std 1.904303e+05 0.260170 0.304024 3.577679 \n", "min 1.155000e+03 0.000000 0.000000 0.000000 \n", "25% 1.840000e+05 0.411000 0.000000 2.000000 \n", "50% 2.168930e+05 0.629000 0.000037 5.000000 \n", "75% 2.610550e+05 0.798000 0.034400 8.000000 \n", "max 5.505831e+06 1.000000 1.000000 11.000000 \n", "\n", " liveness loudness mode popularity \\\n", "count 101939.000000 101939.000000 101939.000000 101939.000000 \n", "mean 0.197640 -9.462720 0.618154 39.782311 \n", "std 0.175391 6.198508 0.485841 16.790769 \n", "min 0.000000 -60.000000 0.000000 0.000000 \n", "25% 0.095600 -11.149000 0.000000 29.000000 \n", "50% 0.124000 -7.599000 1.000000 41.000000 \n", "75% 0.241000 -5.509000 1.000000 52.000000 \n", "max 0.999000 2.719000 1.000000 97.000000 \n", "\n", " speechiness tempo time_signature track_number \\\n", "count 101939.000000 101939.000000 101939.000000 101939.000000 \n", "mean 0.128841 118.358527 3.875651 4.608060 \n", "std 0.203324 30.224074 0.517008 7.181805 \n", "min 0.000000 0.000000 0.000000 1.000000 \n", "25% 0.036400 95.973000 4.000000 1.000000 \n", "50% 0.050600 118.067000 4.000000 2.000000 \n", "75% 0.104000 136.045000 4.000000 6.000000 \n", "max 0.969000 244.035000 5.000000 655.000000 \n", "\n", " valence \n", "count 101939.000000 \n", "mean 0.482813 \n", "std 0.261690 \n", "min 0.000000 \n", "25% 0.271000 \n", "50% 0.477000 \n", "75% 0.693000 \n", "max 0.993000 " ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tracks.describe()" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" }, "vscode": { "interpreter": { "hash": "fcea6937c5a281949bbc174b5880db8814ebdb6fc47b05585d01c7da350dd15c" } } }, "nbformat": 4, "nbformat_minor": 2 }