seyia92coding's picture
Upload app.py
3fbcf3c
raw
history blame
3.84 kB
# -*- coding: utf-8 -*-
"""HS_Surprise Module_Metacritic_Games_Recomm.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1sZ9FI7bnnNBcvFa2rt78EKYeU2L4XrXi
"""
!pip install surprise
from surprise import NMF, SVD, SVDpp, KNNBasic, KNNWithMeans, KNNWithZScore, CoClustering
from surprise.model_selection import cross_validate
from surprise import Reader, Dataset
!pip install gradio
import gradio as gr
import pandas as pd
import numpy as np
import scipy as sp
from scipy import sparse
from datetime import datetime
df = pd.read_csv("/content/metacritic_critic_reviews.csv", on_bad_lines='skip', encoding="utf-8")
df.dropna(inplace=True)
df.head()
#Create date column by converting the date into a datetime object then returning only the year
def add_year(full_date):
datetime_object = datetime.strptime(full_date, '%b %d, %Y')
return datetime_object.year
df['year'] = df['date'].apply(add_year)
#Add the year in brackets to the name of the game to avoid confusion
def year_game(row):
calendar_year = str(row['year'])
year_game_combined = str(row['game']) + " (" + calendar_year + ")"
return year_game_combined
df['game'] = df.apply(year_game, axis=1)
#['PC', '3DS', 'PlayStation Vita', 'Wii U', 'PlayStation 4','Xbox One', 'Switch']
df = df[df['platform'] == 'PlayStation 4']
#Filter by games since 2015 onwards
over_2015 = df[df['year'] >= 2015]
#Group by average score then sort by descending
top_recent_scorers = over_2015.groupby('game')['score'].mean().sort_values(ascending=False)
#Only show top 20 games
top_40_games = top_recent_scorers.index[:40]
#top_100_games = top_recent_scorers.index[:100]
df.shape
combined_games_data = df[['game','name','score']]
algorithms = [NMF(), SVD(), SVDpp(), KNNWithZScore(), CoClustering()]
def recommender(user_prof, user_algo=KNNWithZScore(), combined_games_data=combined_games_data): # top_100_games=top_100_games,
my_ratings = user_prof[user_prof['score'] != 0]
combined_games_data = pd.concat([combined_games_data, my_ratings], axis=0)
combined_games_data.columns = ['itemID', 'userID', 'rating']
# use the transform method group by userID and count to keep the games with more than reviews within user profile. Ideally 20 or more.
combined_games_data['reviews'] = combined_games_data.groupby(['itemID'])['rating'].transform('count')
combined_games_data = combined_games_data[combined_games_data.reviews>=my_ratings.shape[0]][['userID', 'itemID', 'rating']]
reader = Reader(rating_scale=(1.0, 100.0))
data = Dataset.load_from_df(combined_games_data, reader)
unique_ids = combined_games_data['itemID'].unique()
iids1001 = combined_games_data.loc[combined_games_data['userID']==1001, 'itemID']
games_to_predict = np.setdiff1d(unique_ids,iids1001)
for i in range(len(algorithms)):
if i == 'NMF':
user_algo = NMF()
elif i == 'SVD':
user_algo = SVD()
elif i == 'SVDpp':
user_algo = SVDpp()
elif i == 'KNN':
user_algo = KNNWithZScore()
elif i == 'CoClustering':
user_algo = CoClustering()
else:
user_algo = NMF()
algo = user_algo
algo.fit(data.build_full_trainset())
my_recs = []
for iid in games_to_predict:
my_recs.append((iid, algo.predict(uid=1001,iid=iid).est))
result = pd.DataFrame(my_recs, columns=['iid', 'predictions']).sort_values('predictions', ascending=False).head(10)
return result
default_entries = []
for record in range(len(top_40_games)):
default_entries.append([top_40_games[record], 1001, 0])
iface = gr.Interface(recommender,
inputs=[gr.inputs.Dataframe(
headers=['game','name','score'],
default=default_entries
),
gr.inputs.Radio(['NMF', 'SVD', 'SVDpp', 'KNN', 'CoClustering'])],
outputs="dataframe",
)
iface.launch(debug=True)