# **Evaluating the Recommendation Model**

In [305]:
import gradio as gr
import torch
import torch.nn as nn
from joblib import load
import sklearn

In [306]:
user_preferences = pd.read_csv('user_preferences.zip')

In [307]:
# Define the same neural network model
class ImprovedSongRecommender(nn.Module):
 def __init__(self, input_size, num_titles):
 super(ImprovedSongRecommender, self).__init__()
 self.fc1 = nn.Linear(input_size, 128)
 self.bn1 = nn.BatchNorm1d(128)
 self.fc2 = nn.Linear(128, 256)
 self.bn2 = nn.BatchNorm1d(256)
 self.fc3 = nn.Linear(256, 128)
 self.bn3 = nn.BatchNorm1d(128)
 self.output = nn.Linear(128, num_titles)
 self.dropout = nn.Dropout(0.5)

 def forward(self, x):
 x = torch.relu(self.bn1(self.fc1(x)))
 x = self.dropout(x)
 x = torch.relu(self.bn2(self.fc2(x)))
 x = self.dropout(x)
 x = torch.relu(self.bn3(self.fc3(x)))
 x = self.dropout(x)
 x = self.output(x)
 return x

# Load the trained model
model_path = "improved_model.pth"
num_unique_titles = 4855

In [308]:
model = ImprovedSongRecommender(input_size=2, num_titles=num_unique_titles)
model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
model.eval()

ImprovedSongRecommender(
 (fc1): Linear(in_features=2, out_features=128, bias=True)
 (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (fc2): Linear(in_features=128, out_features=256, bias=True)
 (bn2): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (fc3): Linear(in_features=256, out_features=128, bias=True)
 (bn3): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (output): Linear(in_features=128, out_features=4855, bias=True)
 (dropout): Dropout(p=0.5, inplace=False)
)

In [309]:
# Load the label encoders and scaler
label_encoders_path = "new_label_encoders.joblib"
scaler_path = "new_scaler.joblib"

label_encoders = load(label_encoders_path)
scaler = load(scaler_path)

# Create a mapping from encoded indices to actual song titles
index_to_song_title = {index: title for index, title in enumerate(label_encoders['title'].classes_)}


In [310]:
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import joblib
import re

# Function to clean tags and artist names
def clean_text(text):
 # Convert to lowercase
 text = text.lower()
 # Remove special characters and digits
 text = re.sub(r'[^a-zA-Z\s]', '', text)
 # Remove extra white spaces
 text = re.sub(r'\s+', ' ', text).strip()
 return text

columns_to_check = ['tags', 'artist', 'tags', 'song', 'listeners', 'playcount'] # Specify the columns you want to check for NaN values
user_preferences = user_preferences.dropna(subset=columns_to_check)


# Clean 'tags' and 'artist_name' columns
user_preferences['tags'] = user_preferences['tags'].apply(clean_text)
user_preferences['artist'] = user_preferences['artist'].apply(clean_text)

def label_encode_data(df):
 df = df.copy(deep=True)
 label_encoders = {}
 unknown_label = 'unknown' # Define an unknown label

 for column in ['tags', 'song', 'artist']:
 le = LabelEncoder()
 unique_categories = df[column].unique().tolist()
 unique_categories.append(unknown_label)
 le.fit(unique_categories)
 df[column] = le.transform(df[column].astype(str))
 label_encoders[column] = le

 return df, label_encoders

# Normalize numerical features
scaler = MinMaxScaler()
user_preferences[['listeners', 'playcount']] = scaler.fit_transform(user_preferences[['listeners', 'playcount']])

# Label encode categorical features
df_scaled, label_encoders = label_encode_data(user_preferences.loc[:, ['tags', 'artist', 'listeners', 'playcount', 'song']])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 user_preferences['tags'] = user_preferences['tags'].apply(clean_text)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 user_preferences['artist'] = user_preferences['artist'].apply(clean_text)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 user_preferences[['listeners', 'playcount']] = scaler.fit_transf

In [311]:
from sklearn.model_selection import train_test_split

In [312]:
# Split data into features and target
X = df_scaled[['tags', 'artist']]
y = df_scaled['song']

# Split the dataset into training and testing sets
X_valid, X_test, y_valid, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Data split into validation and testing sets.")

Data split into validation and testing sets.


In [313]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import numpy as np
from sklearn.metrics import accuracy_score

In [314]:
valid_loader = DataLoader(list(zip(X_valid.values.astype(float), y_valid)), batch_size=1, shuffle=True)
test_loader = DataLoader(list(zip(X_test.values.astype(float), y_test)), batch_size=1, shuffle=False)


In [315]:
valid_accuracy = 0
test_accuracy = 0
for features, labels in valid_loader:
 preds = model(features.float().detach())

 # Get the predicted class (the one with the highest score)
 _, predicted_class = torch.max(preds, 1)

 # Convert to numpy arrays
 predicted_class_np = predicted_class.numpy()
 labels_np = labels.numpy()

 # Calculate accuracy
 accuracy = accuracy_score(labels_np, predicted_class_np)
 valid_accuracy += accuracy

for features, labels in test_loader:
 preds = model(features.float())
 # Get the predicted class (the one with the highest score)
 _, predicted_class = torch.max(preds, 1)

 # Convert to numpy arrays
 predicted_class_np = predicted_class.numpy()
 labels_np = labels.numpy()

 # Calculate accuracy
 accuracy = accuracy_score(labels_np, predicted_class_np)
 test_accuracy += accuracy

In [316]:
print('The loss of the model on the unseen validation dataset is: ', valid_accuracy)
print('The loss of the model on the unseen test dataset is: ', test_accuracy)

The loss of the model on the unseen validation dataset is: 2.0
The loss of the model on the unseen test dataset is: 0.0
