rxxnzz commited on
Commit
6dcc0b2
·
verified ·
1 Parent(s): 42a9159

Upload 22 files

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.cache ADDED
@@ -0,0 +1 @@
 
 
1
+ {"access_token": "BQD363D2V0DfTblE6WIENGJmMsHEwAK6fQ2iitJsDHaE6q-likgolb74XQUSq1EH4YC_1MsegenNN3ysZdjQT2SSXbXPbrlxWMvUiQWg-PxmX66T6NM", "token_type": "Bearer", "expires_in": 3600, "expires_at": 1719793255}
.cache22 ADDED
@@ -0,0 +1 @@
 
 
1
+ {"access_token": "BQCCSbvf9rV9on-exkGuwqveHa6SLR-wuLUwXJfApJQXQ9AKN7TYOPaaotvikq3nFJJ1aI_IPiSm3-Mi-vpd9W_NcwNJbj09ALJEpIEo8oW_5LZXNVI", "token_type": "Bearer", "expires_in": 3600, "expires_at": 1719792377}
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/data_clean.csv filter=lfs diff=lfs merge=lfs -text
37
+ data/data_model.csv filter=lfs diff=lfs merge=lfs -text
38
+ data/data.csv filter=lfs diff=lfs merge=lfs -text
ModellingPSD.ipynb ADDED
@@ -0,0 +1,1182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {
7
+ "id": "K6Ek26jtEbzM"
8
+ },
9
+ "outputs": [],
10
+ "source": [
11
+ "import pandas as pd\n",
12
+ "import ast\n",
13
+ "\n",
14
+ "from sklearn.compose import ColumnTransformer\n",
15
+ "from sklearn.pipeline import Pipeline\n",
16
+ "from sklearn.preprocessing import StandardScaler\n",
17
+ "from sklearn.cluster import KMeans\n",
18
+ "from sklearn.feature_extraction import FeatureHasher\n",
19
+ "\n",
20
+ "from scipy.spatial.distance import cdist\n",
21
+ "import numpy as np"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 2,
27
+ "metadata": {
28
+ "id": "jSok_FhgFQPa"
29
+ },
30
+ "outputs": [],
31
+ "source": [
32
+ "df = pd.read_csv('data/data_clean.csv')"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "execution_count": 3,
38
+ "metadata": {
39
+ "id": "fjlzGblwZiyl"
40
+ },
41
+ "outputs": [],
42
+ "source": [
43
+ "df['artists'] = df['artists'].apply(ast.literal_eval)"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 4,
49
+ "metadata": {
50
+ "colab": {
51
+ "base_uri": "https://localhost:8080/"
52
+ },
53
+ "id": "JHmNLOXKFqEE",
54
+ "outputId": "6536ca09-4f23-4563-f4b3-99385f4272ec"
55
+ },
56
+ "outputs": [
57
+ {
58
+ "data": {
59
+ "text/plain": [
60
+ "Index(['valence', 'year', 'acousticness', 'artists', 'danceability',\n",
61
+ " 'duration_ms', 'energy', 'id', 'instrumentalness', 'key', 'liveness',\n",
62
+ " 'loudness', 'mode', 'name', 'popularity', 'speechiness', 'tempo'],\n",
63
+ " dtype='object')"
64
+ ]
65
+ },
66
+ "execution_count": 4,
67
+ "metadata": {},
68
+ "output_type": "execute_result"
69
+ }
70
+ ],
71
+ "source": [
72
+ "df.columns"
73
+ ]
74
+ },
75
+ {
76
+ "cell_type": "code",
77
+ "execution_count": 5,
78
+ "metadata": {
79
+ "id": "27JWURjxZiyl"
80
+ },
81
+ "outputs": [],
82
+ "source": [
83
+ "DF_COLUMNS = ['valence', 'year', 'acousticness', 'artists', 'danceability',\n",
84
+ " 'duration_ms', 'energy', 'id', 'instrumentalness', 'key',\n",
85
+ " 'liveness', 'loudness', 'mode', 'name', 'popularity', 'release_date',\n",
86
+ " 'speechiness', 'tempo']\n",
87
+ "DF_COLUMNS_MODEL = ['valence', 'year', 'acousticness','artists', 'danceability',\n",
88
+ " 'duration_ms', 'energy', 'instrumentalness', 'key',\n",
89
+ " 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']"
90
+ ]
91
+ },
92
+ {
93
+ "cell_type": "code",
94
+ "execution_count": 6,
95
+ "metadata": {
96
+ "id": "PasM5_-tZiyl"
97
+ },
98
+ "outputs": [],
99
+ "source": [
100
+ "\n",
101
+ "# Kolom yang akan di-hash\n",
102
+ "hash_columns = 'artists'\n",
103
+ "\n",
104
+ "# Inisialisasi FeatureHasher untuk kolom tertentu\n",
105
+ "hasher = FeatureHasher(n_features=10, input_type=\"string\")\n",
106
+ "\n",
107
+ "# Inisialisasi StandardScaler\n",
108
+ "scaler = StandardScaler()\n",
109
+ "\n",
110
+ "# Inisialisasi KMeans\n",
111
+ "kmeans = KMeans(n_clusters=10)\n",
112
+ "\n",
113
+ "# Definisikan transformer untuk hashing kolom tertentu\n",
114
+ "hash_transformer = ('hasher', hasher, hash_columns)\n",
115
+ "\n",
116
+ "# Definisikan transformer untuk kolom yang tidak di-hash\n",
117
+ "remainder_transformer = ('passthrough', 'remainder', slice(0, 0))\n",
118
+ "\n",
119
+ "# Gabungkan transformer untuk semua kolom\n",
120
+ "preprocessor = ColumnTransformer(\n",
121
+ " transformers=[hash_transformer],\n",
122
+ " remainder='passthrough')\n",
123
+ "\n",
124
+ "# Gabungkan preprocessor dengan algoritma clustering (KMeans)\n",
125
+ "pipeline = Pipeline([\n",
126
+ " ('preprocessor', preprocessor),\n",
127
+ " ('scaler', scaler),\n",
128
+ " ('kmeans', kmeans)\n",
129
+ "])"
130
+ ]
131
+ },
132
+ {
133
+ "cell_type": "code",
134
+ "execution_count": 26,
135
+ "metadata": {
136
+ "colab": {
137
+ "base_uri": "https://localhost:8080/",
138
+ "height": 283
139
+ },
140
+ "id": "8GPu5AWiZiym",
141
+ "outputId": "2d8d35c1-a5ba-4504-87f8-98cae876fe90"
142
+ },
143
+ "outputs": [
144
+ {
145
+ "data": {
146
+ "text/html": [
147
+ "<style>#sk-container-id-2 {\n",
148
+ " /* Definition of color scheme common for light and dark mode */\n",
149
+ " --sklearn-color-text: black;\n",
150
+ " --sklearn-color-line: gray;\n",
151
+ " /* Definition of color scheme for unfitted estimators */\n",
152
+ " --sklearn-color-unfitted-level-0: #fff5e6;\n",
153
+ " --sklearn-color-unfitted-level-1: #f6e4d2;\n",
154
+ " --sklearn-color-unfitted-level-2: #ffe0b3;\n",
155
+ " --sklearn-color-unfitted-level-3: chocolate;\n",
156
+ " /* Definition of color scheme for fitted estimators */\n",
157
+ " --sklearn-color-fitted-level-0: #f0f8ff;\n",
158
+ " --sklearn-color-fitted-level-1: #d4ebff;\n",
159
+ " --sklearn-color-fitted-level-2: #b3dbfd;\n",
160
+ " --sklearn-color-fitted-level-3: cornflowerblue;\n",
161
+ "\n",
162
+ " /* Specific color for light theme */\n",
163
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
164
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
165
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
166
+ " --sklearn-color-icon: #696969;\n",
167
+ "\n",
168
+ " @media (prefers-color-scheme: dark) {\n",
169
+ " /* Redefinition of color scheme for dark theme */\n",
170
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
171
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
172
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
173
+ " --sklearn-color-icon: #878787;\n",
174
+ " }\n",
175
+ "}\n",
176
+ "\n",
177
+ "#sk-container-id-2 {\n",
178
+ " color: var(--sklearn-color-text);\n",
179
+ "}\n",
180
+ "\n",
181
+ "#sk-container-id-2 pre {\n",
182
+ " padding: 0;\n",
183
+ "}\n",
184
+ "\n",
185
+ "#sk-container-id-2 input.sk-hidden--visually {\n",
186
+ " border: 0;\n",
187
+ " clip: rect(1px 1px 1px 1px);\n",
188
+ " clip: rect(1px, 1px, 1px, 1px);\n",
189
+ " height: 1px;\n",
190
+ " margin: -1px;\n",
191
+ " overflow: hidden;\n",
192
+ " padding: 0;\n",
193
+ " position: absolute;\n",
194
+ " width: 1px;\n",
195
+ "}\n",
196
+ "\n",
197
+ "#sk-container-id-2 div.sk-dashed-wrapped {\n",
198
+ " border: 1px dashed var(--sklearn-color-line);\n",
199
+ " margin: 0 0.4em 0.5em 0.4em;\n",
200
+ " box-sizing: border-box;\n",
201
+ " padding-bottom: 0.4em;\n",
202
+ " background-color: var(--sklearn-color-background);\n",
203
+ "}\n",
204
+ "\n",
205
+ "#sk-container-id-2 div.sk-container {\n",
206
+ " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
207
+ " but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
208
+ " so we also need the `!important` here to be able to override the\n",
209
+ " default hidden behavior on the sphinx rendered scikit-learn.org.\n",
210
+ " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
211
+ " display: inline-block !important;\n",
212
+ " position: relative;\n",
213
+ "}\n",
214
+ "\n",
215
+ "#sk-container-id-2 div.sk-text-repr-fallback {\n",
216
+ " display: none;\n",
217
+ "}\n",
218
+ "\n",
219
+ "div.sk-parallel-item,\n",
220
+ "div.sk-serial,\n",
221
+ "div.sk-item {\n",
222
+ " /* draw centered vertical line to link estimators */\n",
223
+ " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
224
+ " background-size: 2px 100%;\n",
225
+ " background-repeat: no-repeat;\n",
226
+ " background-position: center center;\n",
227
+ "}\n",
228
+ "\n",
229
+ "/* Parallel-specific style estimator block */\n",
230
+ "\n",
231
+ "#sk-container-id-2 div.sk-parallel-item::after {\n",
232
+ " content: \"\";\n",
233
+ " width: 100%;\n",
234
+ " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
235
+ " flex-grow: 1;\n",
236
+ "}\n",
237
+ "\n",
238
+ "#sk-container-id-2 div.sk-parallel {\n",
239
+ " display: flex;\n",
240
+ " align-items: stretch;\n",
241
+ " justify-content: center;\n",
242
+ " background-color: var(--sklearn-color-background);\n",
243
+ " position: relative;\n",
244
+ "}\n",
245
+ "\n",
246
+ "#sk-container-id-2 div.sk-parallel-item {\n",
247
+ " display: flex;\n",
248
+ " flex-direction: column;\n",
249
+ "}\n",
250
+ "\n",
251
+ "#sk-container-id-2 div.sk-parallel-item:first-child::after {\n",
252
+ " align-self: flex-end;\n",
253
+ " width: 50%;\n",
254
+ "}\n",
255
+ "\n",
256
+ "#sk-container-id-2 div.sk-parallel-item:last-child::after {\n",
257
+ " align-self: flex-start;\n",
258
+ " width: 50%;\n",
259
+ "}\n",
260
+ "\n",
261
+ "#sk-container-id-2 div.sk-parallel-item:only-child::after {\n",
262
+ " width: 0;\n",
263
+ "}\n",
264
+ "\n",
265
+ "/* Serial-specific style estimator block */\n",
266
+ "\n",
267
+ "#sk-container-id-2 div.sk-serial {\n",
268
+ " display: flex;\n",
269
+ " flex-direction: column;\n",
270
+ " align-items: center;\n",
271
+ " background-color: var(--sklearn-color-background);\n",
272
+ " padding-right: 1em;\n",
273
+ " padding-left: 1em;\n",
274
+ "}\n",
275
+ "\n",
276
+ "\n",
277
+ "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
278
+ "clickable and can be expanded/collapsed.\n",
279
+ "- Pipeline and ColumnTransformer use this feature and define the default style\n",
280
+ "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
281
+ "*/\n",
282
+ "\n",
283
+ "/* Pipeline and ColumnTransformer style (default) */\n",
284
+ "\n",
285
+ "#sk-container-id-2 div.sk-toggleable {\n",
286
+ " /* Default theme specific background. It is overwritten whether we have a\n",
287
+ " specific estimator or a Pipeline/ColumnTransformer */\n",
288
+ " background-color: var(--sklearn-color-background);\n",
289
+ "}\n",
290
+ "\n",
291
+ "/* Toggleable label */\n",
292
+ "#sk-container-id-2 label.sk-toggleable__label {\n",
293
+ " cursor: pointer;\n",
294
+ " display: block;\n",
295
+ " width: 100%;\n",
296
+ " margin-bottom: 0;\n",
297
+ " padding: 0.5em;\n",
298
+ " box-sizing: border-box;\n",
299
+ " text-align: center;\n",
300
+ "}\n",
301
+ "\n",
302
+ "#sk-container-id-2 label.sk-toggleable__label-arrow:before {\n",
303
+ " /* Arrow on the left of the label */\n",
304
+ " content: \"▸\";\n",
305
+ " float: left;\n",
306
+ " margin-right: 0.25em;\n",
307
+ " color: var(--sklearn-color-icon);\n",
308
+ "}\n",
309
+ "\n",
310
+ "#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {\n",
311
+ " color: var(--sklearn-color-text);\n",
312
+ "}\n",
313
+ "\n",
314
+ "/* Toggleable content - dropdown */\n",
315
+ "\n",
316
+ "#sk-container-id-2 div.sk-toggleable__content {\n",
317
+ " max-height: 0;\n",
318
+ " max-width: 0;\n",
319
+ " overflow: hidden;\n",
320
+ " text-align: left;\n",
321
+ " /* unfitted */\n",
322
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
323
+ "}\n",
324
+ "\n",
325
+ "#sk-container-id-2 div.sk-toggleable__content.fitted {\n",
326
+ " /* fitted */\n",
327
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
328
+ "}\n",
329
+ "\n",
330
+ "#sk-container-id-2 div.sk-toggleable__content pre {\n",
331
+ " margin: 0.2em;\n",
332
+ " border-radius: 0.25em;\n",
333
+ " color: var(--sklearn-color-text);\n",
334
+ " /* unfitted */\n",
335
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
336
+ "}\n",
337
+ "\n",
338
+ "#sk-container-id-2 div.sk-toggleable__content.fitted pre {\n",
339
+ " /* unfitted */\n",
340
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
341
+ "}\n",
342
+ "\n",
343
+ "#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
344
+ " /* Expand drop-down */\n",
345
+ " max-height: 200px;\n",
346
+ " max-width: 100%;\n",
347
+ " overflow: auto;\n",
348
+ "}\n",
349
+ "\n",
350
+ "#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
351
+ " content: \"▾\";\n",
352
+ "}\n",
353
+ "\n",
354
+ "/* Pipeline/ColumnTransformer-specific style */\n",
355
+ "\n",
356
+ "#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
357
+ " color: var(--sklearn-color-text);\n",
358
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
359
+ "}\n",
360
+ "\n",
361
+ "#sk-container-id-2 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
362
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
363
+ "}\n",
364
+ "\n",
365
+ "/* Estimator-specific style */\n",
366
+ "\n",
367
+ "/* Colorize estimator box */\n",
368
+ "#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
369
+ " /* unfitted */\n",
370
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
371
+ "}\n",
372
+ "\n",
373
+ "#sk-container-id-2 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
374
+ " /* fitted */\n",
375
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
376
+ "}\n",
377
+ "\n",
378
+ "#sk-container-id-2 div.sk-label label.sk-toggleable__label,\n",
379
+ "#sk-container-id-2 div.sk-label label {\n",
380
+ " /* The background is the default theme color */\n",
381
+ " color: var(--sklearn-color-text-on-default-background);\n",
382
+ "}\n",
383
+ "\n",
384
+ "/* On hover, darken the color of the background */\n",
385
+ "#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {\n",
386
+ " color: var(--sklearn-color-text);\n",
387
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
388
+ "}\n",
389
+ "\n",
390
+ "/* Label box, darken color on hover, fitted */\n",
391
+ "#sk-container-id-2 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
392
+ " color: var(--sklearn-color-text);\n",
393
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
394
+ "}\n",
395
+ "\n",
396
+ "/* Estimator label */\n",
397
+ "\n",
398
+ "#sk-container-id-2 div.sk-label label {\n",
399
+ " font-family: monospace;\n",
400
+ " font-weight: bold;\n",
401
+ " display: inline-block;\n",
402
+ " line-height: 1.2em;\n",
403
+ "}\n",
404
+ "\n",
405
+ "#sk-container-id-2 div.sk-label-container {\n",
406
+ " text-align: center;\n",
407
+ "}\n",
408
+ "\n",
409
+ "/* Estimator-specific */\n",
410
+ "#sk-container-id-2 div.sk-estimator {\n",
411
+ " font-family: monospace;\n",
412
+ " border: 1px dotted var(--sklearn-color-border-box);\n",
413
+ " border-radius: 0.25em;\n",
414
+ " box-sizing: border-box;\n",
415
+ " margin-bottom: 0.5em;\n",
416
+ " /* unfitted */\n",
417
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
418
+ "}\n",
419
+ "\n",
420
+ "#sk-container-id-2 div.sk-estimator.fitted {\n",
421
+ " /* fitted */\n",
422
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
423
+ "}\n",
424
+ "\n",
425
+ "/* on hover */\n",
426
+ "#sk-container-id-2 div.sk-estimator:hover {\n",
427
+ " /* unfitted */\n",
428
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
429
+ "}\n",
430
+ "\n",
431
+ "#sk-container-id-2 div.sk-estimator.fitted:hover {\n",
432
+ " /* fitted */\n",
433
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
434
+ "}\n",
435
+ "\n",
436
+ "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
437
+ "\n",
438
+ "/* Common style for \"i\" and \"?\" */\n",
439
+ "\n",
440
+ ".sk-estimator-doc-link,\n",
441
+ "a:link.sk-estimator-doc-link,\n",
442
+ "a:visited.sk-estimator-doc-link {\n",
443
+ " float: right;\n",
444
+ " font-size: smaller;\n",
445
+ " line-height: 1em;\n",
446
+ " font-family: monospace;\n",
447
+ " background-color: var(--sklearn-color-background);\n",
448
+ " border-radius: 1em;\n",
449
+ " height: 1em;\n",
450
+ " width: 1em;\n",
451
+ " text-decoration: none !important;\n",
452
+ " margin-left: 1ex;\n",
453
+ " /* unfitted */\n",
454
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
455
+ " color: var(--sklearn-color-unfitted-level-1);\n",
456
+ "}\n",
457
+ "\n",
458
+ ".sk-estimator-doc-link.fitted,\n",
459
+ "a:link.sk-estimator-doc-link.fitted,\n",
460
+ "a:visited.sk-estimator-doc-link.fitted {\n",
461
+ " /* fitted */\n",
462
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
463
+ " color: var(--sklearn-color-fitted-level-1);\n",
464
+ "}\n",
465
+ "\n",
466
+ "/* On hover */\n",
467
+ "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
468
+ ".sk-estimator-doc-link:hover,\n",
469
+ "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
470
+ ".sk-estimator-doc-link:hover {\n",
471
+ " /* unfitted */\n",
472
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
473
+ " color: var(--sklearn-color-background);\n",
474
+ " text-decoration: none;\n",
475
+ "}\n",
476
+ "\n",
477
+ "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
478
+ ".sk-estimator-doc-link.fitted:hover,\n",
479
+ "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
480
+ ".sk-estimator-doc-link.fitted:hover {\n",
481
+ " /* fitted */\n",
482
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
483
+ " color: var(--sklearn-color-background);\n",
484
+ " text-decoration: none;\n",
485
+ "}\n",
486
+ "\n",
487
+ "/* Span, style for the box shown on hovering the info icon */\n",
488
+ ".sk-estimator-doc-link span {\n",
489
+ " display: none;\n",
490
+ " z-index: 9999;\n",
491
+ " position: relative;\n",
492
+ " font-weight: normal;\n",
493
+ " right: .2ex;\n",
494
+ " padding: .5ex;\n",
495
+ " margin: .5ex;\n",
496
+ " width: min-content;\n",
497
+ " min-width: 20ex;\n",
498
+ " max-width: 50ex;\n",
499
+ " color: var(--sklearn-color-text);\n",
500
+ " box-shadow: 2pt 2pt 4pt #999;\n",
501
+ " /* unfitted */\n",
502
+ " background: var(--sklearn-color-unfitted-level-0);\n",
503
+ " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
504
+ "}\n",
505
+ "\n",
506
+ ".sk-estimator-doc-link.fitted span {\n",
507
+ " /* fitted */\n",
508
+ " background: var(--sklearn-color-fitted-level-0);\n",
509
+ " border: var(--sklearn-color-fitted-level-3);\n",
510
+ "}\n",
511
+ "\n",
512
+ ".sk-estimator-doc-link:hover span {\n",
513
+ " display: block;\n",
514
+ "}\n",
515
+ "\n",
516
+ "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
517
+ "\n",
518
+ "#sk-container-id-2 a.estimator_doc_link {\n",
519
+ " float: right;\n",
520
+ " font-size: 1rem;\n",
521
+ " line-height: 1em;\n",
522
+ " font-family: monospace;\n",
523
+ " background-color: var(--sklearn-color-background);\n",
524
+ " border-radius: 1rem;\n",
525
+ " height: 1rem;\n",
526
+ " width: 1rem;\n",
527
+ " text-decoration: none;\n",
528
+ " /* unfitted */\n",
529
+ " color: var(--sklearn-color-unfitted-level-1);\n",
530
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
531
+ "}\n",
532
+ "\n",
533
+ "#sk-container-id-2 a.estimator_doc_link.fitted {\n",
534
+ " /* fitted */\n",
535
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
536
+ " color: var(--sklearn-color-fitted-level-1);\n",
537
+ "}\n",
538
+ "\n",
539
+ "/* On hover */\n",
540
+ "#sk-container-id-2 a.estimator_doc_link:hover {\n",
541
+ " /* unfitted */\n",
542
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
543
+ " color: var(--sklearn-color-background);\n",
544
+ " text-decoration: none;\n",
545
+ "}\n",
546
+ "\n",
547
+ "#sk-container-id-2 a.estimator_doc_link.fitted:hover {\n",
548
+ " /* fitted */\n",
549
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
550
+ "}\n",
551
+ "</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;preprocessor&#x27;,\n",
552
+ " ColumnTransformer(remainder=&#x27;passthrough&#x27;,\n",
553
+ " transformers=[(&#x27;hasher&#x27;,\n",
554
+ " FeatureHasher(input_type=&#x27;string&#x27;,\n",
555
+ " n_features=10),\n",
556
+ " &#x27;artists&#x27;)])),\n",
557
+ " (&#x27;scaler&#x27;, StandardScaler()),\n",
558
+ " (&#x27;kmeans&#x27;, KMeans(n_clusters=10))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;Pipeline<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.pipeline.Pipeline.html\">?<span>Documentation for Pipeline</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>Pipeline(steps=[(&#x27;preprocessor&#x27;,\n",
559
+ " ColumnTransformer(remainder=&#x27;passthrough&#x27;,\n",
560
+ " transformers=[(&#x27;hasher&#x27;,\n",
561
+ " FeatureHasher(input_type=&#x27;string&#x27;,\n",
562
+ " n_features=10),\n",
563
+ " &#x27;artists&#x27;)])),\n",
564
+ " (&#x27;scaler&#x27;, StandardScaler()),\n",
565
+ " (&#x27;kmeans&#x27;, KMeans(n_clusters=10))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-10\" type=\"checkbox\" ><label for=\"sk-estimator-id-10\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;preprocessor: ColumnTransformer<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>ColumnTransformer(remainder=&#x27;passthrough&#x27;,\n",
566
+ " transformers=[(&#x27;hasher&#x27;,\n",
567
+ " FeatureHasher(input_type=&#x27;string&#x27;,\n",
568
+ " n_features=10),\n",
569
+ " &#x27;artists&#x27;)])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-11\" type=\"checkbox\" ><label for=\"sk-estimator-id-11\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">hasher</label><div class=\"sk-toggleable__content fitted\"><pre>artists</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-12\" type=\"checkbox\" ><label for=\"sk-estimator-id-12\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;FeatureHasher<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.feature_extraction.FeatureHasher.html\">?<span>Documentation for FeatureHasher</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>FeatureHasher(input_type=&#x27;string&#x27;, n_features=10)</pre></div> </div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-13\" type=\"checkbox\" ><label for=\"sk-estimator-id-13\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">remainder</label><div class=\"sk-toggleable__content fitted\"><pre>[&#x27;valence&#x27;, &#x27;year&#x27;, &#x27;acousticness&#x27;, &#x27;danceability&#x27;, &#x27;duration_ms&#x27;, &#x27;energy&#x27;, &#x27;instrumentalness&#x27;, &#x27;key&#x27;, &#x27;liveness&#x27;, &#x27;loudness&#x27;, &#x27;mode&#x27;, &#x27;popularity&#x27;, &#x27;speechiness&#x27;, &#x27;tempo&#x27;]</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-14\" type=\"checkbox\" ><label for=\"sk-estimator-id-14\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">passthrough</label><div class=\"sk-toggleable__content fitted\"><pre>passthrough</pre></div> </div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-15\" type=\"checkbox\" ><label for=\"sk-estimator-id-15\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;StandardScaler<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.StandardScaler.html\">?<span>Documentation for StandardScaler</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>StandardScaler()</pre></div> </div></div><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-16\" type=\"checkbox\" ><label for=\"sk-estimator-id-16\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;KMeans<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.cluster.KMeans.html\">?<span>Documentation for KMeans</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>KMeans(n_clusters=10)</pre></div> </div></div></div></div></div></div>"
570
+ ],
571
+ "text/plain": [
572
+ "Pipeline(steps=[('preprocessor',\n",
573
+ " ColumnTransformer(remainder='passthrough',\n",
574
+ " transformers=[('hasher',\n",
575
+ " FeatureHasher(input_type='string',\n",
576
+ " n_features=10),\n",
577
+ " 'artists')])),\n",
578
+ " ('scaler', StandardScaler()),\n",
579
+ " ('kmeans', KMeans(n_clusters=10))])"
580
+ ]
581
+ },
582
+ "execution_count": 26,
583
+ "metadata": {},
584
+ "output_type": "execute_result"
585
+ }
586
+ ],
587
+ "source": [
588
+ "pipeline.fit(df[DF_COLUMNS_MODEL])"
589
+ ]
590
+ },
591
+ {
592
+ "cell_type": "code",
593
+ "execution_count": 27,
594
+ "metadata": {},
595
+ "outputs": [
596
+ {
597
+ "data": {
598
+ "text/plain": [
599
+ "['model/pipeline.pkl']"
600
+ ]
601
+ },
602
+ "execution_count": 27,
603
+ "metadata": {},
604
+ "output_type": "execute_result"
605
+ }
606
+ ],
607
+ "source": [
608
+ "# export pipeline\n",
609
+ "import joblib\n",
610
+ "joblib.dump(pipeline, 'model/pipeline.pkl')"
611
+ ]
612
+ },
613
+ {
614
+ "cell_type": "code",
615
+ "execution_count": 28,
616
+ "metadata": {
617
+ "id": "vjExO0EvpCsT"
618
+ },
619
+ "outputs": [],
620
+ "source": [
621
+ "import yaml\n",
622
+ "import spotipy\n",
623
+ "from spotipy.oauth2 import SpotifyOAuth,SpotifyClientCredentials\n",
624
+ "\n",
625
+ "stream= open(\"streamlit/spotify/spotify.yaml\")\n",
626
+ "spotify_details = yaml.safe_load(stream)\n",
627
+ "auth_manager = SpotifyClientCredentials(client_id=spotify_details['Client_id'],\n",
628
+ " client_secret=spotify_details['client_secret'])\n",
629
+ "\n",
630
+ "sp = spotipy.client.Spotify(auth_manager=auth_manager)"
631
+ ]
632
+ },
633
+ {
634
+ "cell_type": "code",
635
+ "execution_count": 29,
636
+ "metadata": {
637
+ "id": "ZTyFQkq0y7-J"
638
+ },
639
+ "outputs": [],
640
+ "source": [
641
+ "def get_song_spotify(song_name, data):\n",
642
+ " log = open('log.txt','w')\n",
643
+ " try:\n",
644
+ " result = sp.search(q=song_name, limit=1)\n",
645
+ " except:\n",
646
+ " log.write('Error: Failed to search song')\n",
647
+ " return None\n",
648
+ "\n",
649
+ " if result['tracks']['items'] == []:\n",
650
+ " return None\n",
651
+ "\n",
652
+ " song = result['tracks']['items'][0]\n",
653
+ "\n",
654
+ " id = song['id']\n",
655
+ "\n",
656
+ " try:\n",
657
+ " song_data = data[(data['id'] == id)].iloc[0]\n",
658
+ " return song_data\n",
659
+ " except:\n",
660
+ " try:\n",
661
+ " audio_features = sp.audio_features(id)\n",
662
+ " song_data = {\n",
663
+ " 'valence': audio_features[0]['valence'],\n",
664
+ " \"year\" : result['tracks']['items'][0]['album']['release_date'][:4],\n",
665
+ " 'acousticness': audio_features[0]['acousticness'],\n",
666
+ " 'artists': list(map(lambda x: x['name'], result['tracks']['items'][0]['artists'])),\n",
667
+ " 'danceability': audio_features[0]['danceability'],\n",
668
+ " 'duration_ms': audio_features[0]['duration_ms'],\n",
669
+ " 'energy': audio_features[0]['energy'],\n",
670
+ " 'id': id,\n",
671
+ " 'instrumentalness': audio_features[0]['instrumentalness'],\n",
672
+ " 'key': audio_features[0]['key'],\n",
673
+ " 'liveness': audio_features[0]['liveness'],\n",
674
+ " 'loudness': audio_features[0]['loudness'],\n",
675
+ " 'mode': audio_features[0]['mode'],\n",
676
+ " 'name': result['tracks']['items'][0]['name'],\n",
677
+ " 'popularity': result['tracks']['items'][0]['popularity'],\n",
678
+ " 'speechiness': audio_features[0]['speechiness'],\n",
679
+ " 'tempo': audio_features[0]['tempo']\n",
680
+ " }\n",
681
+ " except:\n",
682
+ " log.write('Error: Failed to get audio features from Spotify')\n",
683
+ " return None\n",
684
+ "\n",
685
+ " return pd.DataFrame([song_data], columns=DF_COLUMNS).iloc[0]"
686
+ ]
687
+ },
688
+ {
689
+ "cell_type": "code",
690
+ "execution_count": 30,
691
+ "metadata": {
692
+ "id": "j2u3jFFly-aI"
693
+ },
694
+ "outputs": [],
695
+ "source": [
696
+ "def get_song_data(song_name, data):\n",
697
+ " try:\n",
698
+ " song_data = data[(data['name'] == song_name)].iloc[0]\n",
699
+ " return song_data\n",
700
+ " except:\n",
701
+ " return get_song_spotify(song_name,data)\n"
702
+ ]
703
+ },
704
+ {
705
+ "cell_type": "code",
706
+ "execution_count": 31,
707
+ "metadata": {
708
+ "id": "C0rZKlkq2pyH"
709
+ },
710
+ "outputs": [],
711
+ "source": [
712
+ "def df_song_data(list_song_name,data):\n",
713
+ " rows_song_data = list()\n",
714
+ " for song_name in list_song_name:\n",
715
+ " rows_song_data.append(get_song_data(song_name,data))\n",
716
+ "\n",
717
+ " return pd.DataFrame(rows_song_data,columns=DF_COLUMNS)"
718
+ ]
719
+ },
720
+ {
721
+ "cell_type": "code",
722
+ "execution_count": 32,
723
+ "metadata": {
724
+ "id": "aJ2LCdi-SvMv"
725
+ },
726
+ "outputs": [],
727
+ "source": [
728
+ "\n",
729
+ "def songs_recommendation(list_song_name,data,num_rec=10):\n",
730
+ "\n",
731
+ " song_data_input = df_song_data(list_song_name,df)\n",
732
+ "\n",
733
+ " vector = pipeline.named_steps[\"preprocessor\"].transform(df_song_data(list_song_name,data))\n",
734
+ " vector = pipeline.named_steps[\"scaler\"].transform(vector)\n",
735
+ "\n",
736
+ " vector = vector.mean(axis=0)\n",
737
+ "\n",
738
+ " predicted_cluster = pipeline.named_steps[\"kmeans\"].predict([vector])\n",
739
+ "\n",
740
+ " cluster_data = df[pipeline.named_steps[\"kmeans\"].labels_ == predicted_cluster[0]]\n",
741
+ "\n",
742
+ " vector_cluster = pipeline.named_steps[\"preprocessor\"].transform(cluster_data)\n",
743
+ " vector_cluster = pipeline.named_steps[\"scaler\"].transform(vector_cluster)\n",
744
+ "\n",
745
+ " distance = cdist([vector],vector_cluster)\n",
746
+ " index = list(np.argsort(distance)[:, :num_rec][0])\n",
747
+ "\n",
748
+ " recsongs = cluster_data.iloc[index]\n",
749
+ " recsongs = recsongs[~recsongs['name'].isin(list_song_name)]\n",
750
+ " return song_data_input,recsongs\n"
751
+ ]
752
+ },
753
+ {
754
+ "cell_type": "code",
755
+ "execution_count": 33,
756
+ "metadata": {
757
+ "id": "RtmQZUDBK6uw"
758
+ },
759
+ "outputs": [],
760
+ "source": [
761
+ "input,output = songs_recommendation(['505'],df)"
762
+ ]
763
+ },
764
+ {
765
+ "cell_type": "code",
766
+ "execution_count": 34,
767
+ "metadata": {
768
+ "colab": {
769
+ "base_uri": "https://localhost:8080/",
770
+ "height": 118
771
+ },
772
+ "id": "W8ZxWwofXA35",
773
+ "outputId": "cbd93a6a-ea55-42bf-91b8-5cbd6f8b8015"
774
+ },
775
+ "outputs": [
776
+ {
777
+ "data": {
778
+ "text/html": [
779
+ "<div>\n",
780
+ "<style scoped>\n",
781
+ " .dataframe tbody tr th:only-of-type {\n",
782
+ " vertical-align: middle;\n",
783
+ " }\n",
784
+ "\n",
785
+ " .dataframe tbody tr th {\n",
786
+ " vertical-align: top;\n",
787
+ " }\n",
788
+ "\n",
789
+ " .dataframe thead th {\n",
790
+ " text-align: right;\n",
791
+ " }\n",
792
+ "</style>\n",
793
+ "<table border=\"1\" class=\"dataframe\">\n",
794
+ " <thead>\n",
795
+ " <tr style=\"text-align: right;\">\n",
796
+ " <th></th>\n",
797
+ " <th>valence</th>\n",
798
+ " <th>year</th>\n",
799
+ " <th>acousticness</th>\n",
800
+ " <th>artists</th>\n",
801
+ " <th>danceability</th>\n",
802
+ " <th>duration_ms</th>\n",
803
+ " <th>energy</th>\n",
804
+ " <th>id</th>\n",
805
+ " <th>instrumentalness</th>\n",
806
+ " <th>key</th>\n",
807
+ " <th>liveness</th>\n",
808
+ " <th>loudness</th>\n",
809
+ " <th>mode</th>\n",
810
+ " <th>name</th>\n",
811
+ " <th>popularity</th>\n",
812
+ " <th>release_date</th>\n",
813
+ " <th>speechiness</th>\n",
814
+ " <th>tempo</th>\n",
815
+ " </tr>\n",
816
+ " </thead>\n",
817
+ " <tbody>\n",
818
+ " <tr>\n",
819
+ " <th>17006</th>\n",
820
+ " <td>0.234</td>\n",
821
+ " <td>2007</td>\n",
822
+ " <td>0.00237</td>\n",
823
+ " <td>[Arctic Monkeys]</td>\n",
824
+ " <td>0.52</td>\n",
825
+ " <td>253587</td>\n",
826
+ " <td>0.852</td>\n",
827
+ " <td>58ge6dfP91o9oXMzq3XkIS</td>\n",
828
+ " <td>0.000058</td>\n",
829
+ " <td>0</td>\n",
830
+ " <td>0.0733</td>\n",
831
+ " <td>-5.866</td>\n",
832
+ " <td>1</td>\n",
833
+ " <td>505</td>\n",
834
+ " <td>76</td>\n",
835
+ " <td>NaN</td>\n",
836
+ " <td>0.0543</td>\n",
837
+ " <td>140.267</td>\n",
838
+ " </tr>\n",
839
+ " </tbody>\n",
840
+ "</table>\n",
841
+ "</div>"
842
+ ],
843
+ "text/plain": [
844
+ " valence year acousticness artists danceability \\\n",
845
+ "17006 0.234 2007 0.00237 [Arctic Monkeys] 0.52 \n",
846
+ "\n",
847
+ " duration_ms energy id instrumentalness key \\\n",
848
+ "17006 253587 0.852 58ge6dfP91o9oXMzq3XkIS 0.000058 0 \n",
849
+ "\n",
850
+ " liveness loudness mode name popularity release_date speechiness \\\n",
851
+ "17006 0.0733 -5.866 1 505 76 NaN 0.0543 \n",
852
+ "\n",
853
+ " tempo \n",
854
+ "17006 140.267 "
855
+ ]
856
+ },
857
+ "execution_count": 34,
858
+ "metadata": {},
859
+ "output_type": "execute_result"
860
+ }
861
+ ],
862
+ "source": [
863
+ "input"
864
+ ]
865
+ },
866
+ {
867
+ "cell_type": "code",
868
+ "execution_count": 35,
869
+ "metadata": {
870
+ "colab": {
871
+ "base_uri": "https://localhost:8080/",
872
+ "height": 543
873
+ },
874
+ "id": "2z40QSBSXDEW",
875
+ "outputId": "3b3c4c68-009c-4094-c812-0f4fdfbc5ad1"
876
+ },
877
+ "outputs": [
878
+ {
879
+ "data": {
880
+ "text/html": [
881
+ "<div>\n",
882
+ "<style scoped>\n",
883
+ " .dataframe tbody tr th:only-of-type {\n",
884
+ " vertical-align: middle;\n",
885
+ " }\n",
886
+ "\n",
887
+ " .dataframe tbody tr th {\n",
888
+ " vertical-align: top;\n",
889
+ " }\n",
890
+ "\n",
891
+ " .dataframe thead th {\n",
892
+ " text-align: right;\n",
893
+ " }\n",
894
+ "</style>\n",
895
+ "<table border=\"1\" class=\"dataframe\">\n",
896
+ " <thead>\n",
897
+ " <tr style=\"text-align: right;\">\n",
898
+ " <th></th>\n",
899
+ " <th>valence</th>\n",
900
+ " <th>year</th>\n",
901
+ " <th>acousticness</th>\n",
902
+ " <th>artists</th>\n",
903
+ " <th>danceability</th>\n",
904
+ " <th>duration_ms</th>\n",
905
+ " <th>energy</th>\n",
906
+ " <th>id</th>\n",
907
+ " <th>instrumentalness</th>\n",
908
+ " <th>key</th>\n",
909
+ " <th>liveness</th>\n",
910
+ " <th>loudness</th>\n",
911
+ " <th>mode</th>\n",
912
+ " <th>name</th>\n",
913
+ " <th>popularity</th>\n",
914
+ " <th>speechiness</th>\n",
915
+ " <th>tempo</th>\n",
916
+ " </tr>\n",
917
+ " </thead>\n",
918
+ " <tbody>\n",
919
+ " <tr>\n",
920
+ " <th>18509</th>\n",
921
+ " <td>0.293</td>\n",
922
+ " <td>2014</td>\n",
923
+ " <td>0.000501</td>\n",
924
+ " <td>[Young the Giant]</td>\n",
925
+ " <td>0.466</td>\n",
926
+ " <td>244693</td>\n",
927
+ " <td>0.763</td>\n",
928
+ " <td>5Az8KU81g2aLBbJN67F2CI</td>\n",
929
+ " <td>0.00000</td>\n",
930
+ " <td>0</td>\n",
931
+ " <td>0.0973</td>\n",
932
+ " <td>-4.448</td>\n",
933
+ " <td>1</td>\n",
934
+ " <td>Mind Over Matter</td>\n",
935
+ " <td>69</td>\n",
936
+ " <td>0.0318</td>\n",
937
+ " <td>154.914</td>\n",
938
+ " </tr>\n",
939
+ " <tr>\n",
940
+ " <th>18039</th>\n",
941
+ " <td>0.370</td>\n",
942
+ " <td>2012</td>\n",
943
+ " <td>0.126000</td>\n",
944
+ " <td>[One Direction]</td>\n",
945
+ " <td>0.514</td>\n",
946
+ " <td>200400</td>\n",
947
+ " <td>0.727</td>\n",
948
+ " <td>6M31fPFCYB8Job3MCjjrDV</td>\n",
949
+ " <td>0.00000</td>\n",
950
+ " <td>0</td>\n",
951
+ " <td>0.0978</td>\n",
952
+ " <td>-6.131</td>\n",
953
+ " <td>1</td>\n",
954
+ " <td>They Don't Know About Us</td>\n",
955
+ " <td>79</td>\n",
956
+ " <td>0.0492</td>\n",
957
+ " <td>147.917</td>\n",
958
+ " </tr>\n",
959
+ " <tr>\n",
960
+ " <th>108337</th>\n",
961
+ " <td>0.285</td>\n",
962
+ " <td>2018</td>\n",
963
+ " <td>0.103000</td>\n",
964
+ " <td>[BTS]</td>\n",
965
+ " <td>0.541</td>\n",
966
+ " <td>275696</td>\n",
967
+ " <td>0.661</td>\n",
968
+ " <td>2tMBCYj22KxtmkAACIvvk4</td>\n",
969
+ " <td>0.00000</td>\n",
970
+ " <td>1</td>\n",
971
+ " <td>0.0779</td>\n",
972
+ " <td>-5.838</td>\n",
973
+ " <td>1</td>\n",
974
+ " <td>Magic Shop</td>\n",
975
+ " <td>72</td>\n",
976
+ " <td>0.0889</td>\n",
977
+ " <td>149.926</td>\n",
978
+ " </tr>\n",
979
+ " <tr>\n",
980
+ " <th>17405</th>\n",
981
+ " <td>0.205</td>\n",
982
+ " <td>2008</td>\n",
983
+ " <td>0.000354</td>\n",
984
+ " <td>[Nickelback]</td>\n",
985
+ " <td>0.536</td>\n",
986
+ " <td>252653</td>\n",
987
+ " <td>0.890</td>\n",
988
+ " <td>06T10fEzN8ZCcqzQZYA184</td>\n",
989
+ " <td>0.00165</td>\n",
990
+ " <td>0</td>\n",
991
+ " <td>0.1330</td>\n",
992
+ " <td>-5.222</td>\n",
993
+ " <td>1</td>\n",
994
+ " <td>Gotta Be Somebody</td>\n",
995
+ " <td>63</td>\n",
996
+ " <td>0.0601</td>\n",
997
+ " <td>115.998</td>\n",
998
+ " </tr>\n",
999
+ " <tr>\n",
1000
+ " <th>140275</th>\n",
1001
+ " <td>0.285</td>\n",
1002
+ " <td>2018</td>\n",
1003
+ " <td>0.103000</td>\n",
1004
+ " <td>[BTS]</td>\n",
1005
+ " <td>0.541</td>\n",
1006
+ " <td>275696</td>\n",
1007
+ " <td>0.661</td>\n",
1008
+ " <td>22ryCgQttpV7oCrn7llGRu</td>\n",
1009
+ " <td>0.00000</td>\n",
1010
+ " <td>1</td>\n",
1011
+ " <td>0.0779</td>\n",
1012
+ " <td>-5.838</td>\n",
1013
+ " <td>1</td>\n",
1014
+ " <td>Magic Shop</td>\n",
1015
+ " <td>68</td>\n",
1016
+ " <td>0.0889</td>\n",
1017
+ " <td>149.926</td>\n",
1018
+ " </tr>\n",
1019
+ " <tr>\n",
1020
+ " <th>17373</th>\n",
1021
+ " <td>0.261</td>\n",
1022
+ " <td>2008</td>\n",
1023
+ " <td>0.004010</td>\n",
1024
+ " <td>[Theory of a Deadman]</td>\n",
1025
+ " <td>0.415</td>\n",
1026
+ " <td>215400</td>\n",
1027
+ " <td>0.781</td>\n",
1028
+ " <td>5xyACR2lzIyzFepF4qlAas</td>\n",
1029
+ " <td>0.00000</td>\n",
1030
+ " <td>1</td>\n",
1031
+ " <td>0.1260</td>\n",
1032
+ " <td>-6.947</td>\n",
1033
+ " <td>1</td>\n",
1034
+ " <td>Not Meant to Be</td>\n",
1035
+ " <td>58</td>\n",
1036
+ " <td>0.0357</td>\n",
1037
+ " <td>142.966</td>\n",
1038
+ " </tr>\n",
1039
+ " <tr>\n",
1040
+ " <th>37677</th>\n",
1041
+ " <td>0.361</td>\n",
1042
+ " <td>2015</td>\n",
1043
+ " <td>0.092800</td>\n",
1044
+ " <td>[One Direction]</td>\n",
1045
+ " <td>0.546</td>\n",
1046
+ " <td>196933</td>\n",
1047
+ " <td>0.657</td>\n",
1048
+ " <td>1ZWLWVqeEMWMKTlteS0yLH</td>\n",
1049
+ " <td>0.00000</td>\n",
1050
+ " <td>1</td>\n",
1051
+ " <td>0.1190</td>\n",
1052
+ " <td>-4.787</td>\n",
1053
+ " <td>1</td>\n",
1054
+ " <td>Love You Goodbye</td>\n",
1055
+ " <td>70</td>\n",
1056
+ " <td>0.0332</td>\n",
1057
+ " <td>133.188</td>\n",
1058
+ " </tr>\n",
1059
+ " <tr>\n",
1060
+ " <th>17340</th>\n",
1061
+ " <td>0.306</td>\n",
1062
+ " <td>2008</td>\n",
1063
+ " <td>0.131000</td>\n",
1064
+ " <td>[Taylor Swift]</td>\n",
1065
+ " <td>0.617</td>\n",
1066
+ " <td>236267</td>\n",
1067
+ " <td>0.741</td>\n",
1068
+ " <td>1D4PL9B8gOg78jiHg3FvBb</td>\n",
1069
+ " <td>0.00000</td>\n",
1070
+ " <td>2</td>\n",
1071
+ " <td>0.0772</td>\n",
1072
+ " <td>-3.970</td>\n",
1073
+ " <td>1</td>\n",
1074
+ " <td>Love Story</td>\n",
1075
+ " <td>79</td>\n",
1076
+ " <td>0.0311</td>\n",
1077
+ " <td>118.984</td>\n",
1078
+ " </tr>\n",
1079
+ " <tr>\n",
1080
+ " <th>36264</th>\n",
1081
+ " <td>0.385</td>\n",
1082
+ " <td>2008</td>\n",
1083
+ " <td>0.033000</td>\n",
1084
+ " <td>[Nickelback]</td>\n",
1085
+ " <td>0.489</td>\n",
1086
+ " <td>262627</td>\n",
1087
+ " <td>0.748</td>\n",
1088
+ " <td>1xQZtbipNLyP0e0hihE5F5</td>\n",
1089
+ " <td>0.00000</td>\n",
1090
+ " <td>2</td>\n",
1091
+ " <td>0.1440</td>\n",
1092
+ " <td>-5.488</td>\n",
1093
+ " <td>1</td>\n",
1094
+ " <td>I'd Come for You</td>\n",
1095
+ " <td>59</td>\n",
1096
+ " <td>0.0311</td>\n",
1097
+ " <td>147.998</td>\n",
1098
+ " </tr>\n",
1099
+ " </tbody>\n",
1100
+ "</table>\n",
1101
+ "</div>"
1102
+ ],
1103
+ "text/plain": [
1104
+ " valence year acousticness artists danceability \\\n",
1105
+ "18509 0.293 2014 0.000501 [Young the Giant] 0.466 \n",
1106
+ "18039 0.370 2012 0.126000 [One Direction] 0.514 \n",
1107
+ "108337 0.285 2018 0.103000 [BTS] 0.541 \n",
1108
+ "17405 0.205 2008 0.000354 [Nickelback] 0.536 \n",
1109
+ "140275 0.285 2018 0.103000 [BTS] 0.541 \n",
1110
+ "17373 0.261 2008 0.004010 [Theory of a Deadman] 0.415 \n",
1111
+ "37677 0.361 2015 0.092800 [One Direction] 0.546 \n",
1112
+ "17340 0.306 2008 0.131000 [Taylor Swift] 0.617 \n",
1113
+ "36264 0.385 2008 0.033000 [Nickelback] 0.489 \n",
1114
+ "\n",
1115
+ " duration_ms energy id instrumentalness key \\\n",
1116
+ "18509 244693 0.763 5Az8KU81g2aLBbJN67F2CI 0.00000 0 \n",
1117
+ "18039 200400 0.727 6M31fPFCYB8Job3MCjjrDV 0.00000 0 \n",
1118
+ "108337 275696 0.661 2tMBCYj22KxtmkAACIvvk4 0.00000 1 \n",
1119
+ "17405 252653 0.890 06T10fEzN8ZCcqzQZYA184 0.00165 0 \n",
1120
+ "140275 275696 0.661 22ryCgQttpV7oCrn7llGRu 0.00000 1 \n",
1121
+ "17373 215400 0.781 5xyACR2lzIyzFepF4qlAas 0.00000 1 \n",
1122
+ "37677 196933 0.657 1ZWLWVqeEMWMKTlteS0yLH 0.00000 1 \n",
1123
+ "17340 236267 0.741 1D4PL9B8gOg78jiHg3FvBb 0.00000 2 \n",
1124
+ "36264 262627 0.748 1xQZtbipNLyP0e0hihE5F5 0.00000 2 \n",
1125
+ "\n",
1126
+ " liveness loudness mode name popularity \\\n",
1127
+ "18509 0.0973 -4.448 1 Mind Over Matter 69 \n",
1128
+ "18039 0.0978 -6.131 1 They Don't Know About Us 79 \n",
1129
+ "108337 0.0779 -5.838 1 Magic Shop 72 \n",
1130
+ "17405 0.1330 -5.222 1 Gotta Be Somebody 63 \n",
1131
+ "140275 0.0779 -5.838 1 Magic Shop 68 \n",
1132
+ "17373 0.1260 -6.947 1 Not Meant to Be 58 \n",
1133
+ "37677 0.1190 -4.787 1 Love You Goodbye 70 \n",
1134
+ "17340 0.0772 -3.970 1 Love Story 79 \n",
1135
+ "36264 0.1440 -5.488 1 I'd Come for You 59 \n",
1136
+ "\n",
1137
+ " speechiness tempo \n",
1138
+ "18509 0.0318 154.914 \n",
1139
+ "18039 0.0492 147.917 \n",
1140
+ "108337 0.0889 149.926 \n",
1141
+ "17405 0.0601 115.998 \n",
1142
+ "140275 0.0889 149.926 \n",
1143
+ "17373 0.0357 142.966 \n",
1144
+ "37677 0.0332 133.188 \n",
1145
+ "17340 0.0311 118.984 \n",
1146
+ "36264 0.0311 147.998 "
1147
+ ]
1148
+ },
1149
+ "execution_count": 35,
1150
+ "metadata": {},
1151
+ "output_type": "execute_result"
1152
+ }
1153
+ ],
1154
+ "source": [
1155
+ "output"
1156
+ ]
1157
+ }
1158
+ ],
1159
+ "metadata": {
1160
+ "colab": {
1161
+ "provenance": []
1162
+ },
1163
+ "kernelspec": {
1164
+ "display_name": "Python 3",
1165
+ "name": "python3"
1166
+ },
1167
+ "language_info": {
1168
+ "codemirror_mode": {
1169
+ "name": "ipython",
1170
+ "version": 3
1171
+ },
1172
+ "file_extension": ".py",
1173
+ "mimetype": "text/x-python",
1174
+ "name": "python",
1175
+ "nbconvert_exporter": "python",
1176
+ "pygments_lexer": "ipython3",
1177
+ "version": "3.12.3"
1178
+ }
1179
+ },
1180
+ "nbformat": 4,
1181
+ "nbformat_minor": 0
1182
+ }
__pycache__/model.cpython-312.pyc ADDED
Binary file (6.2 kB). View file
 
controller.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ def get_song_from_id(id):
3
+
4
+
5
+ return
6
+
7
+
8
+ def get_song_from_title(title):
9
+
10
+
11
+ return
12
+
13
+ def get_recomendation(songs,n_rec):
14
+
15
+ return
data/data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c97c42349c2e97339ff908ab01163ec5a3264f02ec65e78cb961c85077ded7f8
3
+ size 29654587
data/data_by_artist.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/data_by_genres.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/data_by_year.csv ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mode,year,acousticness,danceability,duration_ms,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,key
2
+ 1,1921,0.8868960000000005,0.4185973333333336,260537.16666666663,0.23181513333333334,0.34487805886666656,0.20571,-17.04866666666665,0.073662,101.53149333333329,0.37932666666666665,0.6533333333333333,2
3
+ 1,1922,0.9385915492957748,0.4820422535211267,165469.74647887325,0.23781535211267596,0.4341948697183099,0.2407197183098592,-19.275281690140844,0.1166549295774648,100.88452112676056,0.5355492957746479,0.14084507042253522,10
4
+ 1,1923,0.9572467913513516,0.5773405405405401,177942.36216216214,0.2624064864864865,0.37173272502702703,0.2274621621621621,-14.129210810810811,0.0939486486486487,114.0107297297297,0.6254924324324328,5.389189189189189,0
5
+ 1,1924,0.940199860169493,0.5498940677966102,191046.70762711862,0.3443466101694912,0.5817009136440677,0.2352190677966101,-14.231343220338989,0.09208940677966099,120.68957203389822,0.6637254237288139,0.6610169491525424,10
6
+ 1,1925,0.9626070503597138,0.5738633093525181,184986.92446043165,0.2785935251798561,0.4182973612230215,0.2376679856115108,-14.14641366906474,0.11191798561151084,115.5219208633093,0.6219287769784171,2.6043165467625897,5
7
+ 1,1926,0.660817216981134,0.5998802612481859,156881.65747460088,0.2114670907111756,0.3330931111175616,0.2323695936139332,-18.492538461538466,0.4837036284470243,109.64803265602328,0.4369104571843251,1.4223512336719883,9
8
+ 1,1927,0.9361794552845558,0.6482682926829262,184993.59837398372,0.2643213008130081,0.3913284986504065,0.16845024390243893,-14.422373983739831,0.11360959349593502,114.84652357723554,0.6597004878048782,0.8016260162601626,7
9
+ 1,1928,0.9386165035685952,0.5342878667724027,214827.90642347344,0.20794779540047573,0.4948354801348136,0.17528937351308488,-17.191982553528927,0.15991149881046796,106.77226169706593,0.4957126883425853,1.5257731958762886,1
10
+ 1,1929,0.6014265861344558,0.6476698529411761,168999.41281512607,0.2418007352941172,0.21520403106092464,0.2360002100840333,-16.530376050420152,0.4900007352941176,110.94835714285715,0.6365298319327733,0.3403361344537815,7
11
+ 1,1930,0.936714937370057,0.5181758835758836,195150.28534303536,0.3335239189189189,0.35220592816528057,0.22131086278586296,-12.869221413721428,0.11990966735966745,109.87119438669428,0.6162376299376306,0.9267151767151768,2
12
+ 1,1931,0.8330399585921336,0.5952217391304357,171553.42546583852,0.2344967732919257,0.22141964161490724,0.22742795031055896,-16.51609420289853,0.4536189440993791,109.02528157349907,0.5131170807453418,0.17080745341614906,0
13
+ 1,1932,0.9357705179282878,0.5577976095617526,195749.37250996014,0.3020677290836656,0.22635657916334675,0.23249621513944224,-13.364055776892426,0.1390073705179282,115.11980079681273,0.5881599601593626,2.151394422310757,7
14
+ 1,1933,0.899897909117222,0.57029030390738,196219.25759768448,0.2798994211287988,0.1839489396816208,0.2090719247467437,-13.06900868306802,0.09112315484804634,112.522,0.5994099855282197,6.898697539797395,7
15
+ 1,1934,0.8911487501730107,0.528705882352941,189356.1262975779,0.2621313148788928,0.276382266643599,0.21345259515570927,-14.756875432525954,0.10245692041522483,115.39084602076124,0.5588046712802772,1.2577854671280275,0
16
+ 1,1935,0.7783855599472987,0.5558691699604746,220124.24703557312,0.2463669762845848,0.2258732980895915,0.2292998682476943,-15.41474967061924,0.3539123847167326,110.66795652173931,0.5455777997364957,1.5013175230566536,7
17
+ 1,1936,0.7723120800000008,0.5580055454545453,220809.1863636364,0.3083886092727274,0.2571099957181816,0.2214382727272726,-14.612999090909069,0.27902936363636377,109.88875454545445,0.5640635454545455,5.080909090909091,10
18
+ 1,1937,0.8654357853881286,0.5421572298325723,223216.5585996956,0.31104829528158273,0.3270876216438359,0.2259677321156773,-13.11514307458143,0.08567884322678843,115.2632222222222,0.5857894977168944,3.328767123287671,7
19
+ 1,1938,0.9192803908646014,0.47997797716150103,249177.09951060358,0.2809812398042416,0.3784246349265906,0.237111092985318,-14.290582381729198,0.095957259380098,111.6253278955954,0.5149111092985317,2.0962479608482867,0
20
+ 1,1939,0.9087381000000004,0.5126828,221602.065,0.2826715000000002,0.2776824694100001,0.2391015999999999,-13.900570000000005,0.1284626999999999,113.174592,0.5599248999999992,4.36,0
21
+ 1,1940,0.8476440499999951,0.52189235,182227.9445,0.3108928949999999,0.31684869986499986,0.26433510000000043,-13.684048000000017,0.2429576500000001,108.44933449999992,0.6167094700000009,0.93,7
22
+ 1,1941,0.8957376562500025,0.4804813541666676,201904.575,0.2656431979166666,0.4449516393124999,0.20183989583333312,-15.755536458333326,0.09136520833333356,107.91512291666668,0.4794560416666661,1.3572916666666666,7
23
+ 1,1942,0.8529343703923857,0.4646338882282997,222361.16825208085,0.2560794708680144,0.39288204787158104,0.2128784185493456,-15.02903210463735,0.08367835909631392,106.00839774078482,0.4774088585017848,1.1266349583828774,7
24
+ 1,1943,0.9027523185915528,0.4551456338028168,240119.9098591549,0.27999004225352114,0.4098969234647892,0.23921070422535246,-13.60212535211268,0.10571957746478874,106.19679154929581,0.4954545070422531,1.1816901408450704,5
25
+ 1,1944,0.9076531914893636,0.5001744680851068,245555.58643617018,0.2534414494680852,0.4492919654122334,0.2387719414893616,-14.58205585106382,0.17328324468085093,105.96392952127657,0.5406954787234041,3.1928191489361697,10
26
+ 1,1945,0.7096568630000005,0.5191431500000011,196161.557,0.2260444155,0.2758942570149996,0.2030099500000001,-16.9814715,0.3050972999999997,108.32407049999986,0.4913613500000002,2.1265,0
27
+ 1,1946,0.9199445823665852,0.4144450116009276,267677.82308584685,0.22217230858468667,0.4861264096461711,0.2108183294663576,-16.921774361948952,0.08800591647331797,103.1645551044084,0.3982538283062647,1.65661252900232,7
28
+ 1,1947,0.9139650224242392,0.4713208484848491,233137.38424242425,0.2635512000000002,0.3475934179090908,0.22382939393939366,-14.377607878787892,0.10056381818181824,108.991026060606,0.5082125454545455,1.6157575757575755,7
29
+ 1,1948,0.9221549999999956,0.4633694736842109,199194.87421052632,0.242464841052632,0.3554848223842104,0.21969357894736785,-15.505407368421027,0.1643593157894739,108.05836105263157,0.4621423684210526,1.5273684210526317,0
30
+ 1,1949,0.9075328949999956,0.4421993999999996,223199.3275,0.24611461499999965,0.2977615361899994,0.2088585500000004,-15.525844999999983,0.10299210000000003,108.87313549999992,0.4304195000000001,3.9389999999999996,0
31
+ 1,1950,0.8539413005999965,0.5042531000000008,215073.1255,0.3140711950000003,0.245000820015,0.2169584000000004,-13.863834499999962,0.1534527499999999,111.74972499999987,0.5516497000000001,3.2065,7
32
+ 1,1951,0.9022971956499974,0.4624800999999999,213154.641,0.25466565500000005,0.3084378555850002,0.21777739999999995,-15.812066000000033,0.10850664999999993,109.62587199999994,0.44134755,3.1495,0
33
+ 1,1952,0.8743009244999965,0.4570322000000001,229434.72699999998,0.25362601000000023,0.2884126469300002,0.2329722999999998,-15.95886600000001,0.1373617999999998,108.23228049999986,0.44313496204999947,3.338,0
34
+ 1,1953,0.8909223502564074,0.4374260512820513,217455.5682051282,0.2661968307692306,0.3183223943230766,0.22173405128205104,-15.499134871794872,0.09008825641025647,110.02556666666672,0.42914205128205135,3.5046153846153847,0
35
+ 1,1954,0.8650197272999992,0.4656388999999994,224029.36800000002,0.264347385,0.2974400082799999,0.2178411499999996,-15.408004500000024,0.10481329999999993,108.71309249999992,0.4523410500000002,7.987,0
36
+ 1,1955,0.8405510069999986,0.4881312,222137.44600000003,0.2950963400000001,0.21660442169000027,0.2214353500000001,-13.842262000000035,0.0895582,111.02127950000023,0.4724154999999997,10.1445,5
37
+ 1,1956,0.8230569899999975,0.4878145000000007,211846.9215,0.30476112000000016,0.23876341778000015,0.18225715000000026,-14.175195500000006,0.062312349999999926,112.20603900000013,0.5084230500000003,15.785,0
38
+ 1,1957,0.7928023589743608,0.5034812307692305,241604.8241025641,0.2952791333333324,0.1945206161692309,0.1827485641025636,-14.124848717948735,0.06435312820512826,112.75229487179486,0.5110101538461542,20.08205128205128,5
39
+ 1,1958,0.7796118650000005,0.4800213999999995,214146.9005,0.3087957550000006,0.18124111005999985,0.2058408500000003,-14.165171000000019,0.05699929999999995,113.32612349999994,0.4976614999999994,19.2575,0
40
+ 1,1959,0.7793927897435907,0.4821143076923083,216292.8369230769,0.3109944051282049,0.18814819406153835,0.1918429230769232,-14.460136923076945,0.06787430769230775,112.11770153846165,0.4784156410256422,20.773333333333333,0
41
+ 1,1960,0.7671814843243259,0.4860292432432435,210209.68378378375,0.3411423783783781,0.1765021594216216,0.20786372972972986,-13.814102702702725,0.0657835135135136,112.56167891891873,0.5239316756756759,19.783783783783786,0
42
+ 1,1961,0.7519038684210535,0.4891655789473678,232186.61421052628,0.3395318673684203,0.26462325314210544,0.20891257894736812,-14.171330526315785,0.0594759473684209,111.80895789473686,0.5128512105263155,19.59842105263158,0
43
+ 1,1962,0.720607806842106,0.4931643684210523,209294.41315789477,0.3710257,0.17147155094210545,0.20189573684210546,-13.177364210526294,0.057739842105263124,114.84100473684195,0.5477920526315792,22.127368421052637,0
44
+ 1,1963,0.7227048300000004,0.48162135,200510.5485,0.3653538849999999,0.18543231468,0.20374530000000027,-13.670647000000017,0.05203614999999992,114.32458749999985,0.5415274500000008,25.5355,0
45
+ 1,1964,0.6941005307692291,0.5041769743589742,195536.4117948718,0.394438825641026,0.1595978017794872,0.2172323589743596,-13.048075897435869,0.06659446153846146,115.0814892307692,0.55726042051282,26.32102564102564,0
46
+ 1,1965,0.6099113295384619,0.5034505641025638,196342.08615384615,0.4490272461538461,0.1183659460923078,0.21383912820512846,-11.888213333333338,0.05755646153846147,116.83544666666687,0.5809506666666668,27.52,0
47
+ 1,1966,0.5248094599699997,0.5070204999999981,216095.113,0.482208465,0.11512595556499995,0.21809309999999996,-11.356919500000007,0.055964050000000085,116.33542399999996,0.5801445499999996,28.2815,2
48
+ 1,1967,0.5296103243521767,0.4926885942971483,198987.5182591296,0.44682383191595854,0.1189468546723362,0.19091765882941508,-11.708829414707358,0.04909839919959968,116.6800225112558,0.5507916958479243,31.190595297648827,0
49
+ 1,1968,0.4937702403500001,0.5010080999999997,214980.984,0.4569696500000004,0.11277694928500002,0.2173509500000005,-12.043861999999995,0.0589884999999999,115.83259149999995,0.5659936500000002,31.2595,0
50
+ 1,1969,0.4488463039999994,0.4880977999999999,242545.9405,0.4876251299999994,0.15945270816500012,0.20696589999999992,-12.062166000000007,0.05497499999999995,117.02432850000014,0.5547452849999999,32.844,2
51
+ 1,1970,0.4600571139500007,0.5063075000000004,242852.1515,0.495633050000001,0.12756700308499988,0.21226880000000026,-11.772557999999995,0.051681499999999936,117.1116105,0.5720753999999992,34.3945,2
52
+ 1,1971,0.4583362981000009,0.5042738499999999,260511.7935,0.4963612550000003,0.12501788923,0.21491455000000012,-12.079936000000007,0.05942240000000008,118.27224749999992,0.5749132500000002,34.4475,2
53
+ 1,1972,0.4600428051999994,0.5209994499999997,245914.2285,0.5045261799999999,0.11488033375999974,0.20695495000000014,-11.773563499999995,0.05487124999999998,119.412852,0.5956881500000009,34.485,2
54
+ 1,1973,0.4165241889999992,0.5154494500000005,257002.7905,0.5361382499999989,0.09968022322999996,0.20137474999999974,-11.260197000000012,0.057825750000000085,120.14631649999998,0.5921162000000004,36.428000000000004,0
55
+ 1,1974,0.4472074226000001,0.51533975,252555.79100000003,0.5147236550000011,0.11636098351000007,0.20934634999999968,-11.816489000000018,0.0573193499999999,119.41645299999985,0.5812744000000009,32.2465,2
56
+ 1,1975,0.4337738871950005,0.5209980000000007,254969.3715,0.5232085050000003,0.11132203374499992,0.21544135000000025,-11.48593899999998,0.06109845,118.68401550000021,0.5694151500000002,34.812,2
57
+ 1,1976,0.36147063210999936,0.5297624999999999,267676.967,0.5538432500000001,0.12162904398000007,0.24903219999999995,-11.114236999999996,0.06274994999999996,120.52205800000002,0.5807332499999998,35.835,2
58
+ 1,1977,0.3393025333750004,0.5333237000000017,257121.848,0.560287959000001,0.12772103194499998,0.2232140500000004,-11.077354499999998,0.060854549999999966,122.18543050000022,0.58524655,35.8465,9
59
+ 1,1978,0.32896857344999936,0.5407746000000005,249051.887,0.5614288650000007,0.10038121450999994,0.21807094999999949,-10.891867999999995,0.06275740000000009,120.37794350000006,0.5895352500000014,35.891,2
60
+ 1,1979,0.29593224063,0.5620453000000001,252860.2265,0.5910127149999996,0.11575137201500015,0.21344310000000027,-10.977513500000004,0.06905859999999991,122.45681650000013,0.6099818000000005,36.172,0
61
+ 1,1980,0.28495539763589706,0.5561523589743594,252835.53333333327,0.5977773948717964,0.1287510612717948,0.20375410256410267,-10.700942051282043,0.05924861538461545,122.98500102564111,0.5980580512820513,36.206666666666656,0
62
+ 1,1981,0.3012042267400006,0.5421846999999999,250904.205,0.5993827249999998,0.14683794661999985,0.2250998999999999,-10.924404499999977,0.057915999999999926,123.16939500000014,0.5829518500000006,35.7525,9
63
+ 1,1982,0.2895800382050001,0.5642594000000005,248357.306,0.5909193894999996,0.10001638300000004,0.20100675,-10.9331225,0.06585305000000008,120.91860749999992,0.5864384500000001,36.247,2
64
+ 1,1983,0.2947778916300001,0.5456292999999998,251123.6985,0.6014252350000012,0.14541052329000045,0.1989224500000002,-11.200171000000024,0.06688600000000006,121.070629,0.5721198999999989,36.268,9
65
+ 1,1984,0.2877095803450008,0.5303089499999987,251845.2565,0.5910916750000013,0.14517303973500045,0.19770620000000003,-11.523238499999987,0.059264800000000166,121.37666200000015,0.5516387100000003,37.7745,9
66
+ 1,1985,0.2965709032099995,0.5556424000000012,252276.258,0.6015154800000013,0.11373855807499987,0.19920644999999984,-11.360688499999991,0.05846480000000006,121.9793035000001,0.5655964500000001,37.013000000000005,9
67
+ 1,1986,0.28508065642499997,0.5409334000000001,254708.4055,0.6149545999999998,0.10994085436499992,0.21187945000000014,-11.103995000000019,0.06481609999999999,120.49930200000016,0.5509485500000019,37.5775,9
68
+ 1,1987,0.3116640663948722,0.5410193333333342,247543.3230769231,0.5798395538461533,0.10197451746666662,0.20201810256410305,-11.623467179487175,0.05778964102564103,120.84570717948708,0.5414511794871785,39.685128205128215,7
69
+ 1,1988,0.32200994651282105,0.5404594358974372,257442.5897435897,0.5856792461538458,0.11719038022051302,0.20770753846153808,-11.519821025641036,0.06609989743589745,120.02442564102579,0.5464853897435901,39.60051282051282,7
70
+ 1,1989,0.3133512518150009,0.5472274499999988,254203.5935,0.584046375000001,0.11318254461499995,0.19908020000000007,-11.38413000000001,0.06393924999999995,120.5790015000003,0.5483329000000007,39.1825,7
71
+ 1,1990,0.3328699505850001,0.5352986500000001,256451.4035,0.5715913950000007,0.12582626809499972,0.19096090000000004,-11.327479499999981,0.06434529999999995,120.06273350000023,0.5265268499999999,40.7855,7
72
+ 1,1991,0.33276529163076995,0.5558243589743591,245912.00615384613,0.5594023641025635,0.11509233979999993,0.18862892307692355,-10.955768717948725,0.07110158974358982,119.36257435897407,0.5486552307692295,41.65589743589744,7
73
+ 1,1992,0.3208087638250009,0.5550648499999989,246506.46,0.5828642050000001,0.1006900830649998,0.20024550000000071,-10.575404499999976,0.07306230000000008,122.58621850000021,0.5590983000000014,42.994,7
74
+ 1,1993,0.3090685537400004,0.5698781500000001,251667.9675,0.5625599100000002,0.11529396538000007,0.1949322500000003,-10.882701000000026,0.07635054999999992,120.36493599999993,0.5555684500000001,42.9975,7
75
+ 1,1994,0.3062343630899999,0.5528299500000016,250992.254,0.578772685000001,0.11188575994999997,0.1921651000000001,-10.282273000000004,0.07604935000000011,118.44516999999986,0.5266491500000006,45.0615,7
76
+ 1,1995,0.30251180612000045,0.5590457500000001,246624.0915,0.5785795100000009,0.12326879606999998,0.2013737000000004,-10.119450000000008,0.08375444999999987,119.28197799999987,0.5302466999999994,44.801,0
77
+ 1,1996,0.30102687980000065,0.5878306000000001,248180.983,0.5860684750000011,0.09505281203500013,0.1986863000000002,-9.44381849999999,0.08607765000000005,117.70835699999998,0.5608732500000001,44.903999999999996,7
78
+ 1,1997,0.3067156934549995,0.5768138499999994,245749.2815,0.5986987151000001,0.10469017861999998,0.1984788000000003,-9.200875500000018,0.08850605000000017,118.18231600000021,0.5485525000000002,45.582,7
79
+ 1,1998,0.29148946157000016,0.5862116499999998,248542.6075,0.6051975050000015,0.1113257503499999,0.1937308999999996,-8.971167999999997,0.09173105000000006,118.54010800000003,0.5377209,45.8945,7
80
+ 1,1999,0.2715901110661201,0.5821579190158892,245177.7452588416,0.6375226022552535,0.09406814106611992,0.20630804715530507,-8.266409533572531,0.09440717580727807,119.50949051768335,0.5564528446950284,47.25832906201947,0
81
+ 1,2000,0.28932270051635994,0.590918047034764,242724.6426380368,0.6254128323108387,0.10116776879345596,0.1976860429447853,-8.247765848670758,0.08920541922290394,118.9993231083843,0.5594754601226991,46.6840490797546,7
82
+ 1,2001,0.2868424748428934,0.5833178553615969,240307.79600997505,0.6269855221945144,0.10721401899251867,0.18702563591022486,-8.305095261845384,0.08918229426433916,117.76539900249378,0.541479107231919,48.75012468827929,7
83
+ 1,2002,0.28262428988500105,0.5761602999999997,239503.283,0.6412697960000012,0.08804835031499995,0.1939111999999998,-7.686639500000013,0.08430759999999995,119.23973849999993,0.5423971500000008,48.6555,7
84
+ 1,2003,0.25647051817297833,0.5757633060388944,244670.5752302968,0.6601652610030712,0.08304927466734914,0.19697630501535301,-7.485545035823955,0.09392574206755384,120.9146218014328,0.5305042476970324,48.62640736949847,7
85
+ 1,2004,0.2805589466225845,0.5676803662258397,237378.70803662261,0.6488679450661226,0.07793403490844357,0.2021994404883014,-7.601655137334684,0.09423880976602242,121.29034587995956,0.5244885554425227,49.27314343845372,7
86
+ 1,2005,0.2557635068666665,0.5722805641025652,237229.5882051282,0.653208511282051,0.09019436118461543,0.19008162564102585,-7.46615897435897,0.09333369230769203,121.61796666666649,0.5325309230769236,50.95333333333333,0
87
+ 1,2006,0.2799863520256408,0.5682301538461539,234042.91435897435,0.6503262820512825,0.07770147187692314,0.1882892820512821,-7.265500512820514,0.08584676923076921,121.79861538461536,0.5200283076923078,51.31384615384616,7
88
+ 1,2007,0.2540809575538463,0.5634143589743592,241049.96256410255,0.6683047743589737,0.07295726805128197,0.19612656410256424,-7.044535897435892,0.08434733333333336,124.0875164102562,0.5167938461538473,51.07589743589744,7
89
+ 1,2008,0.2491917627212275,0.5791928388746803,240107.31560102306,0.6714608207672623,0.0636620903171356,0.19843140664961625,-6.8438040920716166,0.07735636828644507,123.50993350383641,0.5275418925831203,50.63017902813299,0
90
+ 1,2009,0.26192861909693904,0.5641903571428577,238140.0132653061,0.6707487551020408,0.07587207368367352,0.2052522959183674,-7.0460147959183645,0.08545780612244899,123.4638076530612,0.5071696617346935,51.440816326530616,0
91
+ 1,2010,0.2426866435515872,0.5724883432539687,242811.8045634921,0.6817778026289684,0.08298056821924611,0.19970064484126995,-6.9099042658730125,0.08103149801587313,123.5702152777779,0.5208951587301589,52.73015873015873,0
92
+ 1,2011,0.27318266251859197,0.5528669806643526,236998.787307883,0.6483008934060499,0.10377233216658402,0.20330882498760566,-7.574986117997006,0.08747927615270201,121.48399702528488,0.4724536787307882,53.30738720872582,2
93
+ 1,2012,0.24995304438046245,0.5708818508997433,245807.45758354763,0.6565714601542408,0.08520552067866319,0.1897330077120824,-7.260549614395888,0.08174246786632405,121.78173624678669,0.4627090128534706,52.65501285347044,7
94
+ 1,2013,0.2574880859564779,0.5711480263157896,242267.66143724695,0.6455968914473688,0.09836505391700387,0.19963081983805714,-7.472039473684207,0.09384888663967614,120.80682945344118,0.4547411943319834,54.047064777327925,1
95
+ 1,2014,0.24931264588029906,0.5899476807980057,233728.31471321694,0.6487954437905223,0.07656955483790515,0.1918215960099752,-7.06743990024938,0.08406054862842896,122.30526284289296,0.4630487780548628,55.543142144638395,0
96
+ 1,2015,0.2539522577102332,0.5937740628166152,230029.0466058764,0.6270642715298895,0.1067868780547114,0.18885643363728474,-7.625639311043569,0.09677922998986847,120.11541134751768,0.4320983687943254,56.70060790273557,7
97
+ 1,2016,0.28417102995548144,0.6002023928770179,221396.51029493602,0.5928552316082347,0.09398438711741793,0.18116989426822505,-8.061056204785759,0.10431329994435183,118.65262993878693,0.4315320589872012,59.6471897607123,0
98
+ 1,2017,0.2860990665261043,0.6122170180722886,211115.6967871486,0.5904210208835337,0.09709060906626496,0.1917126004016064,-8.31262951807228,0.1105364959839356,117.20273995983936,0.4164763112449793,63.26355421686747,1
99
+ 1,2018,0.2676329907750836,0.6635004755111744,206001.0071326676,0.6024346220161672,0.05421712166904419,0.17632553495007114,-7.168785068949124,0.12717555872563022,121.9223076557296,0.4479212743699474,63.29624346172135,1
100
+ 1,2019,0.2782985863365824,0.6448141097998967,201024.78809645973,0.5932240360184717,0.07764024697280643,0.17261641867624464,-7.722191893278596,0.12104335556695732,120.23564443304272,0.4588176295536167,65.25654181631606,1
101
+ 1,2020,0.2199308880935964,0.6929043349753701,193728.39753694585,0.6312316354679793,0.016375524305418724,0.1785354187192117,-6.595066995073878,0.14138369458128058,124.28312857142865,0.5010478078817729,64.30197044334976,1
data/data_clean.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62c810e41a438408daa46ce1e5b84465306bda6c85baf48bd34879c44922ec5e
3
+ size 27385463
data/data_model.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd3a793966668338184f9dfd0eddb71fe73a8c1e8328da26a24da8e46d94d72c
3
+ size 37670908
data/data_w_genres.csv ADDED
The diff for this file is too large to render. See raw diff
 
data_cleaning.ipynb ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {
7
+ "id": "K6Ek26jtEbzM"
8
+ },
9
+ "outputs": [],
10
+ "source": [
11
+ "import pandas as pd"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 2,
17
+ "metadata": {
18
+ "id": "jSok_FhgFQPa"
19
+ },
20
+ "outputs": [],
21
+ "source": [
22
+ "df = pd.read_csv('data/data.csv')"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "code",
27
+ "execution_count": 3,
28
+ "metadata": {},
29
+ "outputs": [
30
+ {
31
+ "data": {
32
+ "text/plain": [
33
+ "Index(['valence', 'year', 'acousticness', 'artists', 'danceability',\n",
34
+ " 'duration_ms', 'energy', 'explicit', 'id', 'instrumentalness', 'key',\n",
35
+ " 'liveness', 'loudness', 'mode', 'name', 'popularity', 'release_date',\n",
36
+ " 'speechiness', 'tempo'],\n",
37
+ " dtype='object')"
38
+ ]
39
+ },
40
+ "execution_count": 3,
41
+ "metadata": {},
42
+ "output_type": "execute_result"
43
+ }
44
+ ],
45
+ "source": [
46
+ "df.columns"
47
+ ]
48
+ },
49
+ {
50
+ "cell_type": "code",
51
+ "execution_count": 4,
52
+ "metadata": {},
53
+ "outputs": [],
54
+ "source": [
55
+ "selected_columns = ['valence', 'year', 'acousticness', 'artists', 'danceability',\n",
56
+ " 'duration_ms', 'energy','id', 'instrumentalness', 'key',\n",
57
+ " 'liveness', 'loudness', 'mode', 'name', 'popularity',\n",
58
+ " 'speechiness', 'tempo']\n"
59
+ ]
60
+ },
61
+ {
62
+ "cell_type": "code",
63
+ "execution_count": null,
64
+ "metadata": {},
65
+ "outputs": [],
66
+ "source": [
67
+ "df.to_csv('data/data_clean.csv', columns=selected_columns, index=False)"
68
+ ]
69
+ },
70
+ {
71
+ "cell_type": "code",
72
+ "execution_count": 5,
73
+ "metadata": {},
74
+ "outputs": [
75
+ {
76
+ "data": {
77
+ "text/html": [
78
+ "<div>\n",
79
+ "<style scoped>\n",
80
+ " .dataframe tbody tr th:only-of-type {\n",
81
+ " vertical-align: middle;\n",
82
+ " }\n",
83
+ "\n",
84
+ " .dataframe tbody tr th {\n",
85
+ " vertical-align: top;\n",
86
+ " }\n",
87
+ "\n",
88
+ " .dataframe thead th {\n",
89
+ " text-align: right;\n",
90
+ " }\n",
91
+ "</style>\n",
92
+ "<table border=\"1\" class=\"dataframe\">\n",
93
+ " <thead>\n",
94
+ " <tr style=\"text-align: right;\">\n",
95
+ " <th></th>\n",
96
+ " <th>valence</th>\n",
97
+ " <th>year</th>\n",
98
+ " <th>acousticness</th>\n",
99
+ " <th>artists</th>\n",
100
+ " <th>danceability</th>\n",
101
+ " <th>duration_ms</th>\n",
102
+ " <th>energy</th>\n",
103
+ " <th>explicit</th>\n",
104
+ " <th>id</th>\n",
105
+ " <th>instrumentalness</th>\n",
106
+ " <th>key</th>\n",
107
+ " <th>liveness</th>\n",
108
+ " <th>loudness</th>\n",
109
+ " <th>mode</th>\n",
110
+ " <th>name</th>\n",
111
+ " <th>popularity</th>\n",
112
+ " <th>release_date</th>\n",
113
+ " <th>speechiness</th>\n",
114
+ " <th>tempo</th>\n",
115
+ " </tr>\n",
116
+ " </thead>\n",
117
+ " <tbody>\n",
118
+ " <tr>\n",
119
+ " <th>0</th>\n",
120
+ " <td>0.0594</td>\n",
121
+ " <td>1921</td>\n",
122
+ " <td>0.982</td>\n",
123
+ " <td>['Sergei Rachmaninoff', 'James Levine', 'Berli...</td>\n",
124
+ " <td>0.279</td>\n",
125
+ " <td>831667</td>\n",
126
+ " <td>0.211</td>\n",
127
+ " <td>0</td>\n",
128
+ " <td>4BJqT0PrAfrxzMOxytFOIz</td>\n",
129
+ " <td>0.878000</td>\n",
130
+ " <td>10</td>\n",
131
+ " <td>0.665</td>\n",
132
+ " <td>-20.096</td>\n",
133
+ " <td>1</td>\n",
134
+ " <td>Piano Concerto No. 3 in D Minor, Op. 30: III. ...</td>\n",
135
+ " <td>4</td>\n",
136
+ " <td>1921</td>\n",
137
+ " <td>0.0366</td>\n",
138
+ " <td>80.954</td>\n",
139
+ " </tr>\n",
140
+ " <tr>\n",
141
+ " <th>1</th>\n",
142
+ " <td>0.9630</td>\n",
143
+ " <td>1921</td>\n",
144
+ " <td>0.732</td>\n",
145
+ " <td>['Dennis Day']</td>\n",
146
+ " <td>0.819</td>\n",
147
+ " <td>180533</td>\n",
148
+ " <td>0.341</td>\n",
149
+ " <td>0</td>\n",
150
+ " <td>7xPhfUan2yNtyFG0cUWkt8</td>\n",
151
+ " <td>0.000000</td>\n",
152
+ " <td>7</td>\n",
153
+ " <td>0.160</td>\n",
154
+ " <td>-12.441</td>\n",
155
+ " <td>1</td>\n",
156
+ " <td>Clancy Lowered the Boom</td>\n",
157
+ " <td>5</td>\n",
158
+ " <td>1921</td>\n",
159
+ " <td>0.4150</td>\n",
160
+ " <td>60.936</td>\n",
161
+ " </tr>\n",
162
+ " <tr>\n",
163
+ " <th>2</th>\n",
164
+ " <td>0.0394</td>\n",
165
+ " <td>1921</td>\n",
166
+ " <td>0.961</td>\n",
167
+ " <td>['KHP Kridhamardawa Karaton Ngayogyakarta Hadi...</td>\n",
168
+ " <td>0.328</td>\n",
169
+ " <td>500062</td>\n",
170
+ " <td>0.166</td>\n",
171
+ " <td>0</td>\n",
172
+ " <td>1o6I8BglA6ylDMrIELygv1</td>\n",
173
+ " <td>0.913000</td>\n",
174
+ " <td>3</td>\n",
175
+ " <td>0.101</td>\n",
176
+ " <td>-14.850</td>\n",
177
+ " <td>1</td>\n",
178
+ " <td>Gati Bali</td>\n",
179
+ " <td>5</td>\n",
180
+ " <td>1921</td>\n",
181
+ " <td>0.0339</td>\n",
182
+ " <td>110.339</td>\n",
183
+ " </tr>\n",
184
+ " <tr>\n",
185
+ " <th>3</th>\n",
186
+ " <td>0.1650</td>\n",
187
+ " <td>1921</td>\n",
188
+ " <td>0.967</td>\n",
189
+ " <td>['Frank Parker']</td>\n",
190
+ " <td>0.275</td>\n",
191
+ " <td>210000</td>\n",
192
+ " <td>0.309</td>\n",
193
+ " <td>0</td>\n",
194
+ " <td>3ftBPsC5vPBKxYSee08FDH</td>\n",
195
+ " <td>0.000028</td>\n",
196
+ " <td>5</td>\n",
197
+ " <td>0.381</td>\n",
198
+ " <td>-9.316</td>\n",
199
+ " <td>1</td>\n",
200
+ " <td>Danny Boy</td>\n",
201
+ " <td>3</td>\n",
202
+ " <td>1921</td>\n",
203
+ " <td>0.0354</td>\n",
204
+ " <td>100.109</td>\n",
205
+ " </tr>\n",
206
+ " <tr>\n",
207
+ " <th>4</th>\n",
208
+ " <td>0.2530</td>\n",
209
+ " <td>1921</td>\n",
210
+ " <td>0.957</td>\n",
211
+ " <td>['Phil Regan']</td>\n",
212
+ " <td>0.418</td>\n",
213
+ " <td>166693</td>\n",
214
+ " <td>0.193</td>\n",
215
+ " <td>0</td>\n",
216
+ " <td>4d6HGyGT8e121BsdKmw9v6</td>\n",
217
+ " <td>0.000002</td>\n",
218
+ " <td>3</td>\n",
219
+ " <td>0.229</td>\n",
220
+ " <td>-10.096</td>\n",
221
+ " <td>1</td>\n",
222
+ " <td>When Irish Eyes Are Smiling</td>\n",
223
+ " <td>2</td>\n",
224
+ " <td>1921</td>\n",
225
+ " <td>0.0380</td>\n",
226
+ " <td>101.665</td>\n",
227
+ " </tr>\n",
228
+ " </tbody>\n",
229
+ "</table>\n",
230
+ "</div>"
231
+ ],
232
+ "text/plain": [
233
+ " valence year acousticness \\\n",
234
+ "0 0.0594 1921 0.982 \n",
235
+ "1 0.9630 1921 0.732 \n",
236
+ "2 0.0394 1921 0.961 \n",
237
+ "3 0.1650 1921 0.967 \n",
238
+ "4 0.2530 1921 0.957 \n",
239
+ "\n",
240
+ " artists danceability \\\n",
241
+ "0 ['Sergei Rachmaninoff', 'James Levine', 'Berli... 0.279 \n",
242
+ "1 ['Dennis Day'] 0.819 \n",
243
+ "2 ['KHP Kridhamardawa Karaton Ngayogyakarta Hadi... 0.328 \n",
244
+ "3 ['Frank Parker'] 0.275 \n",
245
+ "4 ['Phil Regan'] 0.418 \n",
246
+ "\n",
247
+ " duration_ms energy explicit id instrumentalness \\\n",
248
+ "0 831667 0.211 0 4BJqT0PrAfrxzMOxytFOIz 0.878000 \n",
249
+ "1 180533 0.341 0 7xPhfUan2yNtyFG0cUWkt8 0.000000 \n",
250
+ "2 500062 0.166 0 1o6I8BglA6ylDMrIELygv1 0.913000 \n",
251
+ "3 210000 0.309 0 3ftBPsC5vPBKxYSee08FDH 0.000028 \n",
252
+ "4 166693 0.193 0 4d6HGyGT8e121BsdKmw9v6 0.000002 \n",
253
+ "\n",
254
+ " key liveness loudness mode \\\n",
255
+ "0 10 0.665 -20.096 1 \n",
256
+ "1 7 0.160 -12.441 1 \n",
257
+ "2 3 0.101 -14.850 1 \n",
258
+ "3 5 0.381 -9.316 1 \n",
259
+ "4 3 0.229 -10.096 1 \n",
260
+ "\n",
261
+ " name popularity release_date \\\n",
262
+ "0 Piano Concerto No. 3 in D Minor, Op. 30: III. ... 4 1921 \n",
263
+ "1 Clancy Lowered the Boom 5 1921 \n",
264
+ "2 Gati Bali 5 1921 \n",
265
+ "3 Danny Boy 3 1921 \n",
266
+ "4 When Irish Eyes Are Smiling 2 1921 \n",
267
+ "\n",
268
+ " speechiness tempo \n",
269
+ "0 0.0366 80.954 \n",
270
+ "1 0.4150 60.936 \n",
271
+ "2 0.0339 110.339 \n",
272
+ "3 0.0354 100.109 \n",
273
+ "4 0.0380 101.665 "
274
+ ]
275
+ },
276
+ "execution_count": 5,
277
+ "metadata": {},
278
+ "output_type": "execute_result"
279
+ }
280
+ ],
281
+ "source": [
282
+ "df.head()"
283
+ ]
284
+ }
285
+ ],
286
+ "metadata": {
287
+ "colab": {
288
+ "provenance": []
289
+ },
290
+ "kernelspec": {
291
+ "display_name": "Python 3",
292
+ "name": "python3"
293
+ },
294
+ "language_info": {
295
+ "codemirror_mode": {
296
+ "name": "ipython",
297
+ "version": 3
298
+ },
299
+ "file_extension": ".py",
300
+ "mimetype": "text/x-python",
301
+ "name": "python",
302
+ "nbconvert_exporter": "python",
303
+ "pygments_lexer": "ipython3",
304
+ "version": "3.12.4"
305
+ }
306
+ },
307
+ "nbformat": 4,
308
+ "nbformat_minor": 0
309
+ }
log.txt ADDED
File without changes
main.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import streamlit as st
3
+
4
+ # PAGE TITLE
5
+ st.set_page_config(page_title='Music Recommender App', page_icon='🎵')
6
+
7
+ # TITLE
8
+ st.title('Music Recommender App')
9
+
10
+ with st.sidebar:
11
+ st.header('Welcome to the Music Recommender System!')
12
+ st.write('This is a simple music recommender system that uses the Spotify API to recommend songs based on the song you input. The system uses the Spotify API to get the audio features of the song you input and then recommends songs that are similar to it.')
13
+ # sparation
14
+ st.write('---')
15
+
16
+ with st.expander('⚙️ Setting'):
17
+ num_rec = st.slider('Number of Recommendations', min_value=1, max_value=20, value=5)
18
+
19
+
20
+ with st.form(key='my_form'):
21
+ # INPUT
22
+ song_title = st.text_input("Song title", "")
23
+ generate_button = st.form_submit_button("Generate")
24
+
25
+
26
+ if generate_button:
27
+ with st.spinner('Generating Recommendations...'):
28
+ time.sleep(2)
29
+ st.write("Recommendations for", song_title)
30
+
31
+
model.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import joblib
2
+ import pandas as pd
3
+ import numpy as np
4
+ import ast
5
+ from scipy.spatial.distance import cdist
6
+
7
+
8
+
9
+ import yaml
10
+ import spotipy
11
+ from spotipy.oauth2 import SpotifyOAuth,SpotifyClientCredentials
12
+
13
+ DF_COLUMNS = ['valence', 'year', 'acousticness', 'artists', 'danceability',
14
+ 'duration_ms', 'energy', 'id', 'instrumentalness', 'key',
15
+ 'liveness', 'loudness', 'mode', 'name', 'popularity', 'release_date',
16
+ 'speechiness', 'tempo']
17
+ DF_COLUMNS_MODEL = ['valence', 'year', 'acousticness','artists', 'danceability',
18
+ 'duration_ms', 'energy', 'instrumentalness', 'key',
19
+ 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
20
+
21
+ pipeline = joblib.load('model/pipeline.pkl')
22
+
23
+
24
+ stream= open("spotify/spotify.yaml")
25
+ spotify_details = yaml.safe_load(stream)
26
+ auth_manager = SpotifyClientCredentials(client_id=spotify_details['Client_id'],
27
+ client_secret=spotify_details['client_secret'])
28
+
29
+ sp = spotipy.client.Spotify(auth_manager=auth_manager)
30
+
31
+ def get_song_spotify(song_name, data):
32
+ log = open('log.txt','w')
33
+ try:
34
+ result = sp.search(q=song_name, limit=1)
35
+ except:
36
+ log.write('Error: Failed to search song')
37
+ return None
38
+
39
+ if result['tracks']['items'] == []:
40
+ return None
41
+
42
+ song = result['tracks']['items'][0]
43
+
44
+ id = song['id']
45
+
46
+ try:
47
+ song_data = data[(data['id'] == id)].iloc[0]
48
+ return song_data
49
+ except:
50
+ try:
51
+ audio_features = sp.audio_features(id)
52
+ song_data = {
53
+ 'valence': audio_features[0]['valence'],
54
+ "year" : result['tracks']['items'][0]['album']['release_date'][:4],
55
+ 'acousticness': audio_features[0]['acousticness'],
56
+ 'artists': list(map(lambda x: x['name'], result['tracks']['items'][0]['artists'])),
57
+ 'danceability': audio_features[0]['danceability'],
58
+ 'duration_ms': audio_features[0]['duration_ms'],
59
+ 'energy': audio_features[0]['energy'],
60
+ 'id': id,
61
+ 'instrumentalness': audio_features[0]['instrumentalness'],
62
+ 'key': audio_features[0]['key'],
63
+ 'liveness': audio_features[0]['liveness'],
64
+ 'loudness': audio_features[0]['loudness'],
65
+ 'mode': audio_features[0]['mode'],
66
+ 'name': result['tracks']['items'][0]['name'],
67
+ 'popularity': result['tracks']['items'][0]['popularity'],
68
+ 'speechiness': audio_features[0]['speechiness'],
69
+ 'tempo': audio_features[0]['tempo']
70
+ }
71
+ except:
72
+ log.write('Error: Failed to get audio features from Spotify')
73
+ return None
74
+
75
+ return pd.DataFrame([song_data], columns=DF_COLUMNS).iloc[0]
76
+
77
+ def get_song_data(song_name, data):
78
+ try:
79
+ song_data = data[(data['name'] == song_name)].iloc[0]
80
+ return song_data
81
+ except:
82
+ return get_song_spotify(song_name,data)
83
+
84
+ def df_song_data(list_song_name,data):
85
+ rows_song_data = list()
86
+ for song_name in list_song_name:
87
+ rows_song_data.append(get_song_data(song_name,data))
88
+
89
+ return pd.DataFrame(rows_song_data,columns=DF_COLUMNS)
90
+
91
+ def songs_recommendation(list_song_name,data,num_rec=10):
92
+
93
+ song_data_input = df_song_data(list_song_name,data).to_dict(orient='records')
94
+
95
+ vector = pipeline.named_steps["preprocessor"].transform(df_song_data(list_song_name,data))
96
+ vector = pipeline.named_steps["scaler"].transform(vector)
97
+
98
+ vector = vector.mean(axis=0)
99
+
100
+ predicted_cluster = pipeline.named_steps["kmeans"].predict([vector])
101
+
102
+ cluster_data = data[pipeline.named_steps["kmeans"].labels_ == predicted_cluster[0]]
103
+
104
+ vector_cluster = pipeline.named_steps["preprocessor"].transform(cluster_data)
105
+ vector_cluster = pipeline.named_steps["scaler"].transform(vector_cluster)
106
+
107
+ distance = cdist([vector],vector_cluster)
108
+ index = list(np.argsort(distance)[:, :num_rec][0])
109
+
110
+ recsongs = cluster_data.iloc[index]
111
+ recsongs = recsongs[~recsongs['name'].isin(list_song_name)].to_dict(orient='records')
112
+ return song_data_input,recsongs
113
+
114
+ def get_url(id_track):
115
+ try:
116
+ result = sp.track(id_track)
117
+ return {"image": result['album']['images'][0]['url'],"sample": result['preview_url']}
118
+ except:
119
+ return 'https://i.scdn.co/image/ab67616d00004851f221ae4798e902bf102e1bd2'
120
+
121
+
122
+ df = pd.read_csv('data/data_clean.csv')
123
+ df['artists'] = df['artists'].apply(ast.literal_eval)
124
+
model/pipeline.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afe2e2477452c9e98e13ece48cb8ff1c512d85371f33a557ead66b20c5b4a4cc
3
+ size 688202
recyclebin/ModellingPSD.ipynb ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 14,
6
+ "metadata": {
7
+ "id": "K6Ek26jtEbzM"
8
+ },
9
+ "outputs": [],
10
+ "source": [
11
+ "import pandas as pd\n",
12
+ "import ast\n",
13
+ "\n",
14
+ "from sklearn.compose import ColumnTransformer\n",
15
+ "from sklearn.pipeline import Pipeline\n",
16
+ "from sklearn.preprocessing import StandardScaler\n",
17
+ "from sklearn.cluster import KMeans\n",
18
+ "from sklearn.feature_extraction import FeatureHasher"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 15,
24
+ "metadata": {
25
+ "id": "jSok_FhgFQPa"
26
+ },
27
+ "outputs": [],
28
+ "source": [
29
+ "df = pd.read_csv('data/data_clean.csv')"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "code",
34
+ "execution_count": 16,
35
+ "metadata": {},
36
+ "outputs": [],
37
+ "source": [
38
+ "df['artists'] = df['artists'].apply(ast.literal_eval)"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": 17,
44
+ "metadata": {
45
+ "colab": {
46
+ "base_uri": "https://localhost:8080/"
47
+ },
48
+ "id": "JHmNLOXKFqEE",
49
+ "outputId": "ab92c016-b009-4378-b320-995b268be64f"
50
+ },
51
+ "outputs": [
52
+ {
53
+ "data": {
54
+ "text/plain": [
55
+ "Index(['valence', 'year', 'acousticness', 'artists', 'danceability',\n",
56
+ " 'duration_ms', 'energy', 'explicit', 'id', 'instrumentalness', 'key',\n",
57
+ " 'liveness', 'loudness', 'mode', 'name', 'popularity', 'release_date',\n",
58
+ " 'speechiness', 'tempo'],\n",
59
+ " dtype='object')"
60
+ ]
61
+ },
62
+ "execution_count": 17,
63
+ "metadata": {},
64
+ "output_type": "execute_result"
65
+ }
66
+ ],
67
+ "source": [
68
+ "df.columns"
69
+ ]
70
+ },
71
+ {
72
+ "cell_type": "code",
73
+ "execution_count": 18,
74
+ "metadata": {},
75
+ "outputs": [],
76
+ "source": [
77
+ "DF_COLUMNS = ['valence', 'year', 'acousticness', 'artists', 'danceability',\n",
78
+ " 'duration_ms', 'energy', 'id', 'instrumentalness', 'key',\n",
79
+ " 'liveness', 'loudness', 'mode', 'name', 'popularity',\n",
80
+ " 'speechiness', 'tempo']\n",
81
+ "DF_COLUMNS_MODEL = ['valence', 'year', 'acousticness', 'danceability',\n",
82
+ " 'duration_ms', 'energy', 'instrumentalness', 'key',\n",
83
+ " 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']"
84
+ ]
85
+ },
86
+ {
87
+ "cell_type": "code",
88
+ "execution_count": null,
89
+ "metadata": {},
90
+ "outputs": [],
91
+ "source": [
92
+ "\n",
93
+ "\n",
94
+ "# Kolom yang akan di-hash\n",
95
+ "hash_columns = 'artists'\n",
96
+ "\n",
97
+ "# Inisialisasi FeatureHasher untuk kolom tertentu\n",
98
+ "hasher = FeatureHasher(n_features=10, input_type=\"string\")\n",
99
+ "\n",
100
+ "# Inisialisasi StandardScaler\n",
101
+ "scaler = StandardScaler()\n",
102
+ "\n",
103
+ "# Inisialisasi KMeans\n",
104
+ "kmeans = KMeans(n_clusters=10)\n",
105
+ "\n",
106
+ "# Definisikan transformer untuk hashing kolom tertentu\n",
107
+ "hash_transformer = ('hasher', hasher, hash_columns)\n",
108
+ "\n",
109
+ "# Definisikan transformer untuk kolom yang tidak di-hash\n",
110
+ "remainder_transformer = ('passthrough', 'remainder', slice(0, 0))\n",
111
+ "\n",
112
+ "# Gabungkan transformer untuk semua kolom\n",
113
+ "preprocessor = ColumnTransformer(\n",
114
+ " transformers=[hash_transformer], \n",
115
+ " remainder='passthrough')\n",
116
+ "\n",
117
+ "# Gabungkan preprocessor dengan algoritma clustering (KMeans)\n",
118
+ "pipeline = Pipeline([\n",
119
+ " ('preprocessor', preprocessor),\n",
120
+ " ('scaler', scaler),\n",
121
+ " ('kmeans', kmeans)\n",
122
+ "])"
123
+ ]
124
+ },
125
+ {
126
+ "cell_type": "code",
127
+ "execution_count": null,
128
+ "metadata": {},
129
+ "outputs": [],
130
+ "source": [
131
+ "pipeline.fit(df[DF_COLUMNS_MODEL])"
132
+ ]
133
+ },
134
+ {
135
+ "cell_type": "code",
136
+ "execution_count": null,
137
+ "metadata": {},
138
+ "outputs": [],
139
+ "source": [
140
+ "import yaml\n",
141
+ "import spotipy\n",
142
+ "from spotipy.oauth2 import SpotifyOAuth,SpotifyClientCredentials\n",
143
+ "\n",
144
+ "stream= open(\"streamlit/spotify/spotify.yaml\")\n",
145
+ "spotify_details = yaml.safe_load(stream)\n",
146
+ "auth_manager = SpotifyClientCredentials(client_id=spotify_details['Client_id'],\n",
147
+ " client_secret=spotify_details['client_secret'])\n",
148
+ "sp = spotipy.client.Spotify(auth_manager=auth_manager)"
149
+ ]
150
+ },
151
+ {
152
+ "cell_type": "code",
153
+ "execution_count": null,
154
+ "metadata": {},
155
+ "outputs": [],
156
+ "source": [
157
+ "def get_song_spotify(song_name,data):\n",
158
+ " log = open('log.txt','w')\n",
159
+ " try:\n",
160
+ " result = sp.search(q=song_name, limit=1)\n",
161
+ " except:\n",
162
+ " log.write('Error: Failed to search song')\n",
163
+ " return None\n",
164
+ " \n",
165
+ " if result['tracks']['items'] == []:\n",
166
+ " return None\n",
167
+ " \n",
168
+ " song = result['tracks']['items'][0]\n",
169
+ " \n",
170
+ " id = song['id']\n",
171
+ " \n",
172
+ " try:\n",
173
+ " song_data = data[(data['id'] == id)].iloc[0]\n",
174
+ " return song_data\n",
175
+ " except:\n",
176
+ " try:\n",
177
+ " audio_features = sp.audio_features(id)\n",
178
+ " song_data = {\n",
179
+ " 'valence': audio_features[0]['valence'],\n",
180
+ " \"year\" : result['tracks']['items'][0]['album']['release_date'][:4],\n",
181
+ " 'acousticness': audio_features[0]['acousticness'],\n",
182
+ " 'artists': list(map(lambda x: x['name'], result['tracks']['items'][0]['artists'])),\n",
183
+ " 'danceability': audio_features[0]['danceability'],\n",
184
+ " 'duration_ms': audio_features[0]['duration_ms'],\n",
185
+ " 'energy': audio_features[0]['energy'],\n",
186
+ " 'id': id,\n",
187
+ " 'instrumentalness': audio_features[0]['instrumentalness'],\n",
188
+ " 'key': audio_features[0]['key'],\n",
189
+ " 'liveness': audio_features[0]['liveness'],\n",
190
+ " 'loudness': audio_features[0]['loudness'],\n",
191
+ " 'mode': audio_features[0]['mode'],\n",
192
+ " 'name': result['tracks']['items'][0]['name'],\n",
193
+ " 'popularity': result['tracks']['items'][0]['popularity'],\n",
194
+ " 'speechiness': audio_features[0]['speechiness'],\n",
195
+ " 'tempo': audio_features[0]['tempo']\n",
196
+ " }\n",
197
+ " except:\n",
198
+ " log.write('Error: Failed to get audio features from Spotify')\n",
199
+ " return None\n",
200
+ " \n",
201
+ " return pd.DataFrame([song_data], columns=DF_COLUMNS)"
202
+ ]
203
+ },
204
+ {
205
+ "cell_type": "code",
206
+ "execution_count": null,
207
+ "metadata": {},
208
+ "outputs": [],
209
+ "source": [
210
+ "def get_song_data(song_name,data):\n",
211
+ " try:\n",
212
+ " song_data = data[(data['song_name'] == song_name)].iloc[0].to_df()\n",
213
+ " return song_data\n",
214
+ " except:\n",
215
+ " return get_song_spotify(song_name,data)\n",
216
+ " "
217
+ ]
218
+ },
219
+ {
220
+ "cell_type": "code",
221
+ "execution_count": null,
222
+ "metadata": {},
223
+ "outputs": [],
224
+ "source": [
225
+ "song_data = get_song_data('Shape of You',df)"
226
+ ]
227
+ },
228
+ {
229
+ "cell_type": "code",
230
+ "execution_count": null,
231
+ "metadata": {},
232
+ "outputs": [],
233
+ "source": [
234
+ "def df_song_data(list_song_name,data):\n",
235
+ " rows_song_data = pd.DataFrame(columns=DF_COLUMNS)\n",
236
+ " for song_name in list_song_name:\n",
237
+ " rows_song_data.(get_song_data(song_name,data))\n",
238
+ " \n",
239
+ " return pd.DataFrame(rows_song_data,columns=DF_COLUMNS)"
240
+ ]
241
+ },
242
+ {
243
+ "cell_type": "code",
244
+ "execution_count": null,
245
+ "metadata": {},
246
+ "outputs": [],
247
+ "source": [
248
+ "df_song_data(['Shape of You','Despacito'],df)"
249
+ ]
250
+ },
251
+ {
252
+ "cell_type": "code",
253
+ "execution_count": null,
254
+ "metadata": {},
255
+ "outputs": [],
256
+ "source": [
257
+ "vector = pipeline.named_steps[[\"preprocessor\",]].transform(df_song_data(['Shape of You','Despacito'],df))\n"
258
+ ]
259
+ },
260
+ {
261
+ "cell_type": "code",
262
+ "execution_count": null,
263
+ "metadata": {},
264
+ "outputs": [],
265
+ "source": [
266
+ "mean_vector = vector.mean(axis=0)"
267
+ ]
268
+ },
269
+ {
270
+ "cell_type": "code",
271
+ "execution_count": null,
272
+ "metadata": {},
273
+ "outputs": [],
274
+ "source": [
275
+ "predicted_cluster = kmeans.predict([mean_vector])"
276
+ ]
277
+ },
278
+ {
279
+ "cell_type": "code",
280
+ "execution_count": null,
281
+ "metadata": {},
282
+ "outputs": [],
283
+ "source": [
284
+ "from scipy.spatial.distance import cdist\n",
285
+ "import numpy as np\n",
286
+ "\n",
287
+ "def songs_recommendation(list_song_name,data):\n",
288
+ "\n",
289
+ " vector = pipeline.named_steps[\"preprocessor\"].transform(df_song_data(list_song_name,df))\n",
290
+ " vector = pipeline.named_steps[\"scaler\"].transform(vector)\n",
291
+ "\n",
292
+ " vector = vector.mean(axis=0)\n",
293
+ "\n",
294
+ " predicted_cluster = kmeans.predict([vector])\n",
295
+ " \n",
296
+ " cluster_data = df[pipeline.named_steps[\"kmeans\"].labels_ == predicted_cluster[0]]\n",
297
+ " \n",
298
+ " vector_cluster = pipeline.named_steps[\"preprocessor\"].transform(cluster_data)\n",
299
+ " vector_cluster = pipeline.named_steps[\"scaler\"].transform(vector_cluster)\n",
300
+ " \n",
301
+ " distance = cdist([vector],vector_cluster)\n",
302
+ " index = list(np.argsort(distance)[:, :10][0])\n",
303
+ " \n",
304
+ " recsongs = cluster_data.iloc[index]\n",
305
+ " return recsongs[~recsongs['name'].isin(list_song_name)]"
306
+ ]
307
+ }
308
+ ],
309
+ "metadata": {
310
+ "colab": {
311
+ "provenance": []
312
+ },
313
+ "kernelspec": {
314
+ "display_name": "Python 3",
315
+ "name": "python3"
316
+ },
317
+ "language_info": {
318
+ "codemirror_mode": {
319
+ "name": "ipython",
320
+ "version": 3
321
+ },
322
+ "file_extension": ".py",
323
+ "mimetype": "text/x-python",
324
+ "name": "python",
325
+ "nbconvert_exporter": "python",
326
+ "pygments_lexer": "ipython3",
327
+ "version": "3.12.3"
328
+ }
329
+ },
330
+ "nbformat": 4,
331
+ "nbformat_minor": 0
332
+ }
recyclebin/check.ipynb ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import yaml\n",
10
+ "import spotipy\n",
11
+ "from spotipy.oauth2 import SpotifyOAuth,SpotifyClientCredentials\n",
12
+ "\n",
13
+ "stream= open(\"streamlit/spotify/spotify.yaml\")\n",
14
+ "spotify_details = yaml.safe_load(stream)\n",
15
+ "auth_manager = SpotifyClientCredentials(client_id=spotify_details['Client_id'],\n",
16
+ " client_secret=spotify_details['client_secret'])\n",
17
+ "sp = spotipy.client.Spotify(auth_manager=auth_manager)\n"
18
+ ]
19
+ },
20
+ {
21
+ "cell_type": "code",
22
+ "execution_count": 2,
23
+ "metadata": {},
24
+ "outputs": [
25
+ {
26
+ "data": {
27
+ "text/plain": [
28
+ "[{'danceability': 0.808,\n",
29
+ " 'energy': 0.626,\n",
30
+ " 'key': 7,\n",
31
+ " 'loudness': -12.733,\n",
32
+ " 'mode': 1,\n",
33
+ " 'speechiness': 0.168,\n",
34
+ " 'acousticness': 0.00187,\n",
35
+ " 'instrumentalness': 0.159,\n",
36
+ " 'liveness': 0.376,\n",
37
+ " 'valence': 0.37,\n",
38
+ " 'tempo': 123.99,\n",
39
+ " 'type': 'audio_features',\n",
40
+ " 'id': '4JpKVNYnVcJ8tuMKjAj50A',\n",
41
+ " 'uri': 'spotify:track:4JpKVNYnVcJ8tuMKjAj50A',\n",
42
+ " 'track_href': 'https://api.spotify.com/v1/tracks/4JpKVNYnVcJ8tuMKjAj50A',\n",
43
+ " 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/4JpKVNYnVcJ8tuMKjAj50A',\n",
44
+ " 'duration_ms': 535223,\n",
45
+ " 'time_signature': 4}]"
46
+ ]
47
+ },
48
+ "execution_count": 2,
49
+ "metadata": {},
50
+ "output_type": "execute_result"
51
+ }
52
+ ],
53
+ "source": []
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": 17,
58
+ "metadata": {},
59
+ "outputs": [],
60
+ "source": [
61
+ "def get_song_spotipy(song_name,data):\n",
62
+ " \n",
63
+ " results = sp.search(q=song_name, limit=1)\n",
64
+ " \n",
65
+ " if results['tracks']['items'] == []:\n",
66
+ " return None\n",
67
+ " \n",
68
+ " song_id = results['tracks']['items'][0]['id']\n",
69
+ " \n",
70
+ " try:\n",
71
+ " song_data = data[(data['id'] == song_id)].iloc[0]\n",
72
+ " return song_data\n",
73
+ " \n",
74
+ " except: \n",
75
+ " try:\n",
76
+ " audio_features = sp.audio_features(song_id)\n",
77
+ " song_data = {\n",
78
+ " 'name': results['tracks']['items'][0]['name'],\n",
79
+ " 'id': song_id,\n",
80
+ " 'artist_name': list(map(lambda x: x['name'], results['tracks']['items'][0]['artists'])),\n",
81
+ " 'danceability': audio_features[0]['danceability'],\n",
82
+ " 'energy': audio_features[0]['energy'],\n",
83
+ " 'key': audio_features[0]['key'],\n",
84
+ " 'loudness': audio_features[0]['loudness'],\n",
85
+ " 'mode': audio_features[0]['mode'],\n",
86
+ " 'speechiness': audio_features[0]['speechiness'],\n",
87
+ " 'acousticness': audio_features[0]['acousticness'],\n",
88
+ " 'instrumentalness': audio_features[0]['instrumentalness'],\n",
89
+ " 'liveness': audio_features[0]['liveness'],\n",
90
+ " 'valence': audio_features[0]['valence'],\n",
91
+ " 'tempo': audio_features[0]['tempo'],\n",
92
+ " 'duration_ms': audio_features[0]['duration_ms'],\n",
93
+ " 'time_signature': audio_features[0]['time_signature'],\n",
94
+ " 'popularity': results['tracks']['items'][0]['popularity'], \n",
95
+ " \"year\" : results['tracks']['items'][0]['album']['release_date'][:4],\n",
96
+ " }\n",
97
+ " return song_data\n",
98
+ "\n",
99
+ " except:\n",
100
+ " return None"
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "code",
105
+ "execution_count": 18,
106
+ "metadata": {},
107
+ "outputs": [
108
+ {
109
+ "data": {
110
+ "text/plain": [
111
+ "{'song_name': 'Cincin',\n",
112
+ " 'song_id': '7J0isBrUxhIYZVdrBOOlIh',\n",
113
+ " 'artist_name': ['Hindia'],\n",
114
+ " 'danceability': 0.531,\n",
115
+ " 'energy': 0.882,\n",
116
+ " 'key': 4,\n",
117
+ " 'loudness': -4.205,\n",
118
+ " 'mode': 0,\n",
119
+ " 'speechiness': 0.0559,\n",
120
+ " 'acousticness': 0.00445,\n",
121
+ " 'instrumentalness': 0,\n",
122
+ " 'liveness': 0.621,\n",
123
+ " 'valence': 0.476,\n",
124
+ " 'tempo': 155.024,\n",
125
+ " 'duration_ms': 266330,\n",
126
+ " 'time_signature': 4}"
127
+ ]
128
+ },
129
+ "execution_count": 18,
130
+ "metadata": {},
131
+ "output_type": "execute_result"
132
+ }
133
+ ],
134
+ "source": [
135
+ "def get_song_data(song_name, data):\n",
136
+ " \n",
137
+ " try:\n",
138
+ " song_data = data[(data['song_name'] == song_name)].iloc[0]\n",
139
+ " return song_data\n",
140
+ " except:\n",
141
+ " return get_song_spotipy(song_name, data)\n",
142
+ " "
143
+ ]
144
+ },
145
+ {
146
+ "cell_type": "code",
147
+ "execution_count": null,
148
+ "metadata": {},
149
+ "outputs": [],
150
+ "source": [
151
+ "def artists_hash(song_data):\n",
152
+ " artists = song_data['artists']\n",
153
+ " hash_artists = h.transform(artists)"
154
+ ]
155
+ },
156
+ {
157
+ "cell_type": "code",
158
+ "execution_count": null,
159
+ "metadata": {},
160
+ "outputs": [],
161
+ "source": [
162
+ "def get_mean_vector(song_list, data):\n",
163
+ " song_vectors_list = []\n",
164
+ " \n",
165
+ " for song in song_list:\n",
166
+ " song_data = get_song_data(song, data)\n",
167
+ " if song_data is None:\n",
168
+ " continue\n",
169
+ " \n",
170
+ " song_vector = "
171
+ ]
172
+ },
173
+ {
174
+ "cell_type": "code",
175
+ "execution_count": 3,
176
+ "metadata": {},
177
+ "outputs": [],
178
+ "source": [
179
+ "dict_contoh = {\n",
180
+ " 'song_name': 'song_name',\n",
181
+ " 'artists': 'artists',\n",
182
+ " 'danceability': 'danceability',\n",
183
+ " 'energy': 'energy',\n",
184
+ "}\n"
185
+ ]
186
+ },
187
+ {
188
+ "cell_type": "code",
189
+ "execution_count": 5,
190
+ "metadata": {},
191
+ "outputs": [
192
+ {
193
+ "ename": "KeyError",
194
+ "evalue": "('song_name', 'artists')",
195
+ "output_type": "error",
196
+ "traceback": [
197
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
198
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
199
+ "Cell \u001b[0;32mIn[5], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdict_contoh\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43msong_name\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43martists\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\n",
200
+ "\u001b[0;31mKeyError\u001b[0m: ('song_name', 'artists')"
201
+ ]
202
+ }
203
+ ],
204
+ "source": [
205
+ "dict_contoh['song_name', 'artists']"
206
+ ]
207
+ }
208
+ ],
209
+ "metadata": {
210
+ "kernelspec": {
211
+ "display_name": ".env",
212
+ "language": "python",
213
+ "name": "python3"
214
+ },
215
+ "language_info": {
216
+ "codemirror_mode": {
217
+ "name": "ipython",
218
+ "version": 3
219
+ },
220
+ "file_extension": ".py",
221
+ "mimetype": "text/x-python",
222
+ "name": "python",
223
+ "nbconvert_exporter": "python",
224
+ "pygments_lexer": "ipython3",
225
+ "version": "3.12.3"
226
+ }
227
+ },
228
+ "nbformat": 4,
229
+ "nbformat_minor": 2
230
+ }
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ joblib==1.4.2
2
+ numpy==2.0.0
3
+ pandas==2.2.2
4
+ PyYAML==6.0.1
5
+ PyYAML==6.0.1
6
+ scipy==1.14.0
7
+ spotipy==2.24.0
spotify/spotify.yaml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Client_id : 598aa182877149449da2c07b959e006d
2
+ client_secret : 6553d92e14a140f69ec9bd816efa3719