File size: 3,842 Bytes
3fbcf3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# -*- coding: utf-8 -*-
"""HS_Surprise Module_Metacritic_Games_Recomm.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1sZ9FI7bnnNBcvFa2rt78EKYeU2L4XrXi
"""

!pip install surprise

from surprise import NMF, SVD, SVDpp, KNNBasic, KNNWithMeans, KNNWithZScore, CoClustering
from surprise.model_selection import cross_validate
from surprise import Reader, Dataset

!pip install gradio

import gradio as gr

import pandas as pd
import numpy as np
import scipy as sp
from scipy import sparse
from datetime import datetime

df = pd.read_csv("/content/metacritic_critic_reviews.csv", on_bad_lines='skip', encoding="utf-8")
df.dropna(inplace=True)
df.head()

#Create date column by converting the date into a datetime object then returning only the year
def add_year(full_date):
  datetime_object = datetime.strptime(full_date, '%b %d, %Y')
  return datetime_object.year

df['year'] = df['date'].apply(add_year)
#Add the year in brackets to the name of the game to avoid confusion 
def year_game(row):
  calendar_year = str(row['year'])
  year_game_combined = str(row['game']) + " (" + calendar_year + ")"
  return year_game_combined

df['game'] = df.apply(year_game, axis=1)

#['PC', '3DS', 'PlayStation Vita', 'Wii U', 'PlayStation 4','Xbox One', 'Switch']
df = df[df['platform'] == 'PlayStation 4']

#Filter by games since 2015 onwards
over_2015 = df[df['year'] >= 2015]

#Group by average score then sort by descending
top_recent_scorers = over_2015.groupby('game')['score'].mean().sort_values(ascending=False)
#Only show top 20 games
top_40_games = top_recent_scorers.index[:40]
#top_100_games = top_recent_scorers.index[:100]

df.shape

combined_games_data = df[['game','name','score']]
algorithms = [NMF(), SVD(), SVDpp(), KNNWithZScore(), CoClustering()]

def recommender(user_prof, user_algo=KNNWithZScore(), combined_games_data=combined_games_data): # top_100_games=top_100_games, 

  my_ratings = user_prof[user_prof['score'] != 0]
  combined_games_data = pd.concat([combined_games_data, my_ratings], axis=0)
  combined_games_data.columns = ['itemID', 'userID', 'rating']

  # use the transform method group by userID and count to keep the games with more than reviews within user profile. Ideally 20 or more.
  combined_games_data['reviews'] = combined_games_data.groupby(['itemID'])['rating'].transform('count')
  combined_games_data = combined_games_data[combined_games_data.reviews>=my_ratings.shape[0]][['userID', 'itemID', 'rating']]

  reader = Reader(rating_scale=(1.0, 100.0))
  data = Dataset.load_from_df(combined_games_data, reader)

  unique_ids = combined_games_data['itemID'].unique()

  iids1001 = combined_games_data.loc[combined_games_data['userID']==1001, 'itemID']

  games_to_predict = np.setdiff1d(unique_ids,iids1001)

  for i in range(len(algorithms)):
    if i == 'NMF':
      user_algo = NMF()
    elif i == 'SVD':
      user_algo = SVD()
    elif i == 'SVDpp':
      user_algo = SVDpp()
    elif i == 'KNN':
      user_algo = KNNWithZScore()
    elif i == 'CoClustering':
      user_algo = CoClustering()
    else:
      user_algo = NMF()

  algo = user_algo
  algo.fit(data.build_full_trainset())

  my_recs = []
  for iid in games_to_predict:
      my_recs.append((iid, algo.predict(uid=1001,iid=iid).est))
      
  result = pd.DataFrame(my_recs, columns=['iid', 'predictions']).sort_values('predictions', ascending=False).head(10)
  return result

default_entries = []
for record in range(len(top_40_games)):
  default_entries.append([top_40_games[record], 1001, 0])

iface = gr.Interface(recommender, 
    inputs=[gr.inputs.Dataframe(
        headers=['game','name','score'],
        default=default_entries
    ), 
    gr.inputs.Radio(['NMF', 'SVD', 'SVDpp', 'KNN', 'CoClustering'])],
    outputs="dataframe",
)
iface.launch(debug=True)