Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
"""HS_Surprise Module_Metacritic_Games_Recomm.ipynb | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/1sZ9FI7bnnNBcvFa2rt78EKYeU2L4XrXi | |
""" | |
from surprise import NMF, SVD, SVDpp, KNNBasic, KNNWithMeans, KNNWithZScore, CoClustering | |
from surprise.model_selection import cross_validate | |
from surprise import Reader, Dataset | |
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
import scipy as sp | |
from scipy import sparse | |
from datetime import datetime | |
df = pd.read_csv("/content/metacritic_critic_reviews.csv", on_bad_lines='skip', encoding="utf-8") | |
df.dropna(inplace=True) | |
df.head() | |
#Create date column by converting the date into a datetime object then returning only the year | |
def add_year(full_date): | |
datetime_object = datetime.strptime(full_date, '%b %d, %Y') | |
return datetime_object.year | |
df['year'] = df['date'].apply(add_year) | |
#Add the year in brackets to the name of the game to avoid confusion | |
def year_game(row): | |
calendar_year = str(row['year']) | |
year_game_combined = str(row['game']) + " (" + calendar_year + ")" | |
return year_game_combined | |
df['game'] = df.apply(year_game, axis=1) | |
#['PC', '3DS', 'PlayStation Vita', 'Wii U', 'PlayStation 4','Xbox One', 'Switch'] | |
df = df[df['platform'] == 'PlayStation 4'] | |
#Filter by games since 2015 onwards | |
over_2015 = df[df['year'] >= 2015] | |
#Group by average score then sort by descending | |
top_recent_scorers = over_2015.groupby('game')['score'].mean().sort_values(ascending=False) | |
#Only show top 20 games | |
top_40_games = top_recent_scorers.index[:40] | |
#top_100_games = top_recent_scorers.index[:100] | |
combined_games_data = df[['game','name','score']] | |
algorithms = [NMF(), SVD(), SVDpp(), KNNWithZScore(), CoClustering()] | |
def recommender(user_prof, user_algo=KNNWithZScore(), combined_games_data=combined_games_data): # top_100_games=top_100_games, | |
my_ratings = user_prof[user_prof['score'] != 0] | |
combined_games_data = pd.concat([combined_games_data, my_ratings], axis=0) | |
combined_games_data.columns = ['itemID', 'userID', 'rating'] | |
# use the transform method group by userID and count to keep the games with more than reviews within user profile. Ideally 20 or more. | |
combined_games_data['reviews'] = combined_games_data.groupby(['itemID'])['rating'].transform('count') | |
combined_games_data = combined_games_data[combined_games_data.reviews>=my_ratings.shape[0]][['userID', 'itemID', 'rating']] | |
reader = Reader(rating_scale=(1.0, 100.0)) | |
data = Dataset.load_from_df(combined_games_data, reader) | |
unique_ids = combined_games_data['itemID'].unique() | |
iids1001 = combined_games_data.loc[combined_games_data['userID']==1001, 'itemID'] | |
games_to_predict = np.setdiff1d(unique_ids,iids1001) | |
for i in range(len(algorithms)): | |
if i == 'NMF': | |
user_algo = NMF() | |
elif i == 'SVD': | |
user_algo = SVD() | |
elif i == 'SVDpp': | |
user_algo = SVDpp() | |
elif i == 'KNN': | |
user_algo = KNNWithZScore() | |
elif i == 'CoClustering': | |
user_algo = CoClustering() | |
else: | |
user_algo = NMF() | |
algo = user_algo | |
algo.fit(data.build_full_trainset()) | |
my_recs = [] | |
for iid in games_to_predict: | |
my_recs.append((iid, algo.predict(uid=1001,iid=iid).est)) | |
result = pd.DataFrame(my_recs, columns=['iid', 'predictions']).sort_values('predictions', ascending=False).head(10) | |
return result | |
default_entries = [] | |
for record in range(len(top_40_games)): | |
default_entries.append([top_40_games[record], 1001, 0]) | |
iface = gr.Interface(recommender, | |
inputs=[gr.inputs.Dataframe( | |
headers=['game','name','score'], | |
default=default_entries | |
), | |
gr.inputs.Radio(['NMF', 'SVD', 'SVDpp', 'KNN', 'CoClustering'])], | |
outputs="dataframe", | |
) | |
iface.launch(debug=True) |