# -*- coding: utf-8 -*- """HS_Surprise Module_Metacritic_Games_Recomm.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1sZ9FI7bnnNBcvFa2rt78EKYeU2L4XrXi """ !pip install surprise from surprise import NMF, SVD, SVDpp, KNNBasic, KNNWithMeans, KNNWithZScore, CoClustering from surprise.model_selection import cross_validate from surprise import Reader, Dataset !pip install gradio import gradio as gr import pandas as pd import numpy as np import scipy as sp from scipy import sparse from datetime import datetime df = pd.read_csv("/content/metacritic_critic_reviews.csv", on_bad_lines='skip', encoding="utf-8") df.dropna(inplace=True) df.head() #Create date column by converting the date into a datetime object then returning only the year def add_year(full_date): datetime_object = datetime.strptime(full_date, '%b %d, %Y') return datetime_object.year df['year'] = df['date'].apply(add_year) #Add the year in brackets to the name of the game to avoid confusion def year_game(row): calendar_year = str(row['year']) year_game_combined = str(row['game']) + " (" + calendar_year + ")" return year_game_combined df['game'] = df.apply(year_game, axis=1) #['PC', '3DS', 'PlayStation Vita', 'Wii U', 'PlayStation 4','Xbox One', 'Switch'] df = df[df['platform'] == 'PlayStation 4'] #Filter by games since 2015 onwards over_2015 = df[df['year'] >= 2015] #Group by average score then sort by descending top_recent_scorers = over_2015.groupby('game')['score'].mean().sort_values(ascending=False) #Only show top 20 games top_40_games = top_recent_scorers.index[:40] #top_100_games = top_recent_scorers.index[:100] df.shape combined_games_data = df[['game','name','score']] algorithms = [NMF(), SVD(), SVDpp(), KNNWithZScore(), CoClustering()] def recommender(user_prof, user_algo=KNNWithZScore(), combined_games_data=combined_games_data): # top_100_games=top_100_games, my_ratings = user_prof[user_prof['score'] != 0] combined_games_data = pd.concat([combined_games_data, my_ratings], axis=0) combined_games_data.columns = ['itemID', 'userID', 'rating'] # use the transform method group by userID and count to keep the games with more than reviews within user profile. Ideally 20 or more. combined_games_data['reviews'] = combined_games_data.groupby(['itemID'])['rating'].transform('count') combined_games_data = combined_games_data[combined_games_data.reviews>=my_ratings.shape[0]][['userID', 'itemID', 'rating']] reader = Reader(rating_scale=(1.0, 100.0)) data = Dataset.load_from_df(combined_games_data, reader) unique_ids = combined_games_data['itemID'].unique() iids1001 = combined_games_data.loc[combined_games_data['userID']==1001, 'itemID'] games_to_predict = np.setdiff1d(unique_ids,iids1001) for i in range(len(algorithms)): if i == 'NMF': user_algo = NMF() elif i == 'SVD': user_algo = SVD() elif i == 'SVDpp': user_algo = SVDpp() elif i == 'KNN': user_algo = KNNWithZScore() elif i == 'CoClustering': user_algo = CoClustering() else: user_algo = NMF() algo = user_algo algo.fit(data.build_full_trainset()) my_recs = [] for iid in games_to_predict: my_recs.append((iid, algo.predict(uid=1001,iid=iid).est)) result = pd.DataFrame(my_recs, columns=['iid', 'predictions']).sort_values('predictions', ascending=False).head(10) return result default_entries = [] for record in range(len(top_40_games)): default_entries.append([top_40_games[record], 1001, 0]) iface = gr.Interface(recommender, inputs=[gr.inputs.Dataframe( headers=['game','name','score'], default=default_entries ), gr.inputs.Radio(['NMF', 'SVD', 'SVDpp', 'KNN', 'CoClustering'])], outputs="dataframe", ) iface.launch(debug=True)