seyia92coding commited on
Commit
3fbcf3c
1 Parent(s): e2703c5

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -0
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """HS_Surprise Module_Metacritic_Games_Recomm.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1sZ9FI7bnnNBcvFa2rt78EKYeU2L4XrXi
8
+ """
9
+
10
+ !pip install surprise
11
+
12
+ from surprise import NMF, SVD, SVDpp, KNNBasic, KNNWithMeans, KNNWithZScore, CoClustering
13
+ from surprise.model_selection import cross_validate
14
+ from surprise import Reader, Dataset
15
+
16
+ !pip install gradio
17
+
18
+ import gradio as gr
19
+
20
+ import pandas as pd
21
+ import numpy as np
22
+ import scipy as sp
23
+ from scipy import sparse
24
+ from datetime import datetime
25
+
26
+ df = pd.read_csv("/content/metacritic_critic_reviews.csv", on_bad_lines='skip', encoding="utf-8")
27
+ df.dropna(inplace=True)
28
+ df.head()
29
+
30
+ #Create date column by converting the date into a datetime object then returning only the year
31
+ def add_year(full_date):
32
+ datetime_object = datetime.strptime(full_date, '%b %d, %Y')
33
+ return datetime_object.year
34
+
35
+ df['year'] = df['date'].apply(add_year)
36
+ #Add the year in brackets to the name of the game to avoid confusion
37
+ def year_game(row):
38
+ calendar_year = str(row['year'])
39
+ year_game_combined = str(row['game']) + " (" + calendar_year + ")"
40
+ return year_game_combined
41
+
42
+ df['game'] = df.apply(year_game, axis=1)
43
+
44
+ #['PC', '3DS', 'PlayStation Vita', 'Wii U', 'PlayStation 4','Xbox One', 'Switch']
45
+ df = df[df['platform'] == 'PlayStation 4']
46
+
47
+ #Filter by games since 2015 onwards
48
+ over_2015 = df[df['year'] >= 2015]
49
+
50
+ #Group by average score then sort by descending
51
+ top_recent_scorers = over_2015.groupby('game')['score'].mean().sort_values(ascending=False)
52
+ #Only show top 20 games
53
+ top_40_games = top_recent_scorers.index[:40]
54
+ #top_100_games = top_recent_scorers.index[:100]
55
+
56
+ df.shape
57
+
58
+ combined_games_data = df[['game','name','score']]
59
+ algorithms = [NMF(), SVD(), SVDpp(), KNNWithZScore(), CoClustering()]
60
+
61
+ def recommender(user_prof, user_algo=KNNWithZScore(), combined_games_data=combined_games_data): # top_100_games=top_100_games,
62
+
63
+ my_ratings = user_prof[user_prof['score'] != 0]
64
+ combined_games_data = pd.concat([combined_games_data, my_ratings], axis=0)
65
+ combined_games_data.columns = ['itemID', 'userID', 'rating']
66
+
67
+ # use the transform method group by userID and count to keep the games with more than reviews within user profile. Ideally 20 or more.
68
+ combined_games_data['reviews'] = combined_games_data.groupby(['itemID'])['rating'].transform('count')
69
+ combined_games_data = combined_games_data[combined_games_data.reviews>=my_ratings.shape[0]][['userID', 'itemID', 'rating']]
70
+
71
+ reader = Reader(rating_scale=(1.0, 100.0))
72
+ data = Dataset.load_from_df(combined_games_data, reader)
73
+
74
+ unique_ids = combined_games_data['itemID'].unique()
75
+
76
+ iids1001 = combined_games_data.loc[combined_games_data['userID']==1001, 'itemID']
77
+
78
+ games_to_predict = np.setdiff1d(unique_ids,iids1001)
79
+
80
+ for i in range(len(algorithms)):
81
+ if i == 'NMF':
82
+ user_algo = NMF()
83
+ elif i == 'SVD':
84
+ user_algo = SVD()
85
+ elif i == 'SVDpp':
86
+ user_algo = SVDpp()
87
+ elif i == 'KNN':
88
+ user_algo = KNNWithZScore()
89
+ elif i == 'CoClustering':
90
+ user_algo = CoClustering()
91
+ else:
92
+ user_algo = NMF()
93
+
94
+ algo = user_algo
95
+ algo.fit(data.build_full_trainset())
96
+
97
+ my_recs = []
98
+ for iid in games_to_predict:
99
+ my_recs.append((iid, algo.predict(uid=1001,iid=iid).est))
100
+
101
+ result = pd.DataFrame(my_recs, columns=['iid', 'predictions']).sort_values('predictions', ascending=False).head(10)
102
+ return result
103
+
104
+ default_entries = []
105
+ for record in range(len(top_40_games)):
106
+ default_entries.append([top_40_games[record], 1001, 0])
107
+
108
+ iface = gr.Interface(recommender,
109
+ inputs=[gr.inputs.Dataframe(
110
+ headers=['game','name','score'],
111
+ default=default_entries
112
+ ),
113
+ gr.inputs.Radio(['NMF', 'SVD', 'SVDpp', 'KNN', 'CoClustering'])],
114
+ outputs="dataframe",
115
+ )
116
+ iface.launch(debug=True)