|
|
|
"""HS_Surprise Module_Metacritic_Games_Recomm.ipynb |
|
|
|
Automatically generated by Colaboratory. |
|
|
|
Original file is located at |
|
https://colab.research.google.com/drive/1sZ9FI7bnnNBcvFa2rt78EKYeU2L4XrXi |
|
""" |
|
|
|
from surprise import NMF, SVD, SVDpp, KNNBasic, KNNWithMeans, KNNWithZScore, CoClustering |
|
from surprise.model_selection import cross_validate |
|
from surprise import Reader, Dataset |
|
|
|
import gradio as gr |
|
|
|
import pandas as pd |
|
import numpy as np |
|
import scipy as sp |
|
from scipy import sparse |
|
from datetime import datetime |
|
|
|
df = pd.read_csv("metacritic_critic_reviews.csv", on_bad_lines='skip', encoding="utf-8") |
|
df.dropna(inplace=True) |
|
df.head() |
|
|
|
|
|
def add_year(full_date): |
|
datetime_object = datetime.strptime(full_date, '%b %d, %Y') |
|
return datetime_object.year |
|
|
|
df['year'] = df['date'].apply(add_year) |
|
|
|
def year_game(row): |
|
calendar_year = str(row['year']) |
|
year_game_combined = str(row['game']) + " (" + calendar_year + ")" |
|
return year_game_combined |
|
|
|
df['game'] = df.apply(year_game, axis=1) |
|
|
|
|
|
df = df[df['platform'] == 'PlayStation 4'] |
|
|
|
|
|
over_2015 = df[df['year'] >= 2015] |
|
|
|
|
|
top_recent_scorers = over_2015.groupby('game')['score'].mean().sort_values(ascending=False) |
|
|
|
top_40_games = top_recent_scorers.index[:40] |
|
|
|
|
|
combined_games_data = df[['game','name','score']] |
|
algorithms = [NMF(), SVD(), SVDpp(), KNNWithZScore(), CoClustering()] |
|
|
|
def recommender(user_prof, user_algo=KNNWithZScore(), combined_games_data=combined_games_data): |
|
|
|
my_ratings = user_prof[user_prof['score'] != 0] |
|
combined_games_data = pd.concat([combined_games_data, my_ratings], axis=0) |
|
combined_games_data.columns = ['itemID', 'userID', 'rating'] |
|
|
|
|
|
combined_games_data['reviews'] = combined_games_data.groupby(['itemID'])['rating'].transform('count') |
|
combined_games_data = combined_games_data[combined_games_data.reviews>=my_ratings.shape[0]][['userID', 'itemID', 'rating']] |
|
|
|
reader = Reader(rating_scale=(1.0, 100.0)) |
|
data = Dataset.load_from_df(combined_games_data, reader) |
|
|
|
unique_ids = combined_games_data['itemID'].unique() |
|
|
|
iids1001 = combined_games_data.loc[combined_games_data['userID']==1001, 'itemID'] |
|
|
|
games_to_predict = np.setdiff1d(unique_ids,iids1001) |
|
|
|
for i in range(len(algorithms)): |
|
if i == 'NMF': |
|
user_algo = NMF() |
|
elif i == 'SVD': |
|
user_algo = SVD() |
|
elif i == 'SVDpp': |
|
user_algo = SVDpp() |
|
elif i == 'KNN': |
|
user_algo = KNNWithZScore() |
|
elif i == 'CoClustering': |
|
user_algo = CoClustering() |
|
else: |
|
user_algo = NMF() |
|
|
|
algo = user_algo |
|
algo.fit(data.build_full_trainset()) |
|
|
|
my_recs = [] |
|
for iid in games_to_predict: |
|
my_recs.append((iid, algo.predict(uid=1001,iid=iid).est)) |
|
|
|
result = pd.DataFrame(my_recs, columns=['iid', 'predictions']).sort_values('predictions', ascending=False).head(10) |
|
return result |
|
|
|
default_entries = [] |
|
for record in range(len(top_40_games)): |
|
default_entries.append([top_40_games[record], 1001, 0]) |
|
|
|
iface = gr.Interface(recommender, |
|
inputs=[gr.inputs.Dataframe( |
|
headers=['game','name','score'], |
|
default=default_entries |
|
), |
|
gr.inputs.Radio(['NMF', 'SVD', 'SVDpp', 'KNN', 'CoClustering'])], |
|
outputs="dataframe",title="Recommendation Engine for Video Games using Surprise", description="Below is a dataframe of 40 games. Please rate as many as possible so the algorithm can predict the recommendations based on your previous game ratings. Do not edit any other cells beside the score column." |
|
) |
|
iface.launch(debug=True) |