Spaces:
Runtime error
Runtime error
File size: 3,842 Bytes
3fbcf3c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
# -*- coding: utf-8 -*-
"""HS_Surprise Module_Metacritic_Games_Recomm.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1sZ9FI7bnnNBcvFa2rt78EKYeU2L4XrXi
"""
!pip install surprise
from surprise import NMF, SVD, SVDpp, KNNBasic, KNNWithMeans, KNNWithZScore, CoClustering
from surprise.model_selection import cross_validate
from surprise import Reader, Dataset
!pip install gradio
import gradio as gr
import pandas as pd
import numpy as np
import scipy as sp
from scipy import sparse
from datetime import datetime
df = pd.read_csv("/content/metacritic_critic_reviews.csv", on_bad_lines='skip', encoding="utf-8")
df.dropna(inplace=True)
df.head()
#Create date column by converting the date into a datetime object then returning only the year
def add_year(full_date):
datetime_object = datetime.strptime(full_date, '%b %d, %Y')
return datetime_object.year
df['year'] = df['date'].apply(add_year)
#Add the year in brackets to the name of the game to avoid confusion
def year_game(row):
calendar_year = str(row['year'])
year_game_combined = str(row['game']) + " (" + calendar_year + ")"
return year_game_combined
df['game'] = df.apply(year_game, axis=1)
#['PC', '3DS', 'PlayStation Vita', 'Wii U', 'PlayStation 4','Xbox One', 'Switch']
df = df[df['platform'] == 'PlayStation 4']
#Filter by games since 2015 onwards
over_2015 = df[df['year'] >= 2015]
#Group by average score then sort by descending
top_recent_scorers = over_2015.groupby('game')['score'].mean().sort_values(ascending=False)
#Only show top 20 games
top_40_games = top_recent_scorers.index[:40]
#top_100_games = top_recent_scorers.index[:100]
df.shape
combined_games_data = df[['game','name','score']]
algorithms = [NMF(), SVD(), SVDpp(), KNNWithZScore(), CoClustering()]
def recommender(user_prof, user_algo=KNNWithZScore(), combined_games_data=combined_games_data): # top_100_games=top_100_games,
my_ratings = user_prof[user_prof['score'] != 0]
combined_games_data = pd.concat([combined_games_data, my_ratings], axis=0)
combined_games_data.columns = ['itemID', 'userID', 'rating']
# use the transform method group by userID and count to keep the games with more than reviews within user profile. Ideally 20 or more.
combined_games_data['reviews'] = combined_games_data.groupby(['itemID'])['rating'].transform('count')
combined_games_data = combined_games_data[combined_games_data.reviews>=my_ratings.shape[0]][['userID', 'itemID', 'rating']]
reader = Reader(rating_scale=(1.0, 100.0))
data = Dataset.load_from_df(combined_games_data, reader)
unique_ids = combined_games_data['itemID'].unique()
iids1001 = combined_games_data.loc[combined_games_data['userID']==1001, 'itemID']
games_to_predict = np.setdiff1d(unique_ids,iids1001)
for i in range(len(algorithms)):
if i == 'NMF':
user_algo = NMF()
elif i == 'SVD':
user_algo = SVD()
elif i == 'SVDpp':
user_algo = SVDpp()
elif i == 'KNN':
user_algo = KNNWithZScore()
elif i == 'CoClustering':
user_algo = CoClustering()
else:
user_algo = NMF()
algo = user_algo
algo.fit(data.build_full_trainset())
my_recs = []
for iid in games_to_predict:
my_recs.append((iid, algo.predict(uid=1001,iid=iid).est))
result = pd.DataFrame(my_recs, columns=['iid', 'predictions']).sort_values('predictions', ascending=False).head(10)
return result
default_entries = []
for record in range(len(top_40_games)):
default_entries.append([top_40_games[record], 1001, 0])
iface = gr.Interface(recommender,
inputs=[gr.inputs.Dataframe(
headers=['game','name','score'],
default=default_entries
),
gr.inputs.Radio(['NMF', 'SVD', 'SVDpp', 'KNN', 'CoClustering'])],
outputs="dataframe",
)
iface.launch(debug=True) |