Spaces:
Runtime error
Runtime error
seyia92coding
commited on
Commit
•
3fbcf3c
1
Parent(s):
e2703c5
Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""HS_Surprise Module_Metacritic_Games_Recomm.ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colaboratory.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1sZ9FI7bnnNBcvFa2rt78EKYeU2L4XrXi
|
8 |
+
"""
|
9 |
+
|
10 |
+
!pip install surprise
|
11 |
+
|
12 |
+
from surprise import NMF, SVD, SVDpp, KNNBasic, KNNWithMeans, KNNWithZScore, CoClustering
|
13 |
+
from surprise.model_selection import cross_validate
|
14 |
+
from surprise import Reader, Dataset
|
15 |
+
|
16 |
+
!pip install gradio
|
17 |
+
|
18 |
+
import gradio as gr
|
19 |
+
|
20 |
+
import pandas as pd
|
21 |
+
import numpy as np
|
22 |
+
import scipy as sp
|
23 |
+
from scipy import sparse
|
24 |
+
from datetime import datetime
|
25 |
+
|
26 |
+
df = pd.read_csv("/content/metacritic_critic_reviews.csv", on_bad_lines='skip', encoding="utf-8")
|
27 |
+
df.dropna(inplace=True)
|
28 |
+
df.head()
|
29 |
+
|
30 |
+
#Create date column by converting the date into a datetime object then returning only the year
|
31 |
+
def add_year(full_date):
|
32 |
+
datetime_object = datetime.strptime(full_date, '%b %d, %Y')
|
33 |
+
return datetime_object.year
|
34 |
+
|
35 |
+
df['year'] = df['date'].apply(add_year)
|
36 |
+
#Add the year in brackets to the name of the game to avoid confusion
|
37 |
+
def year_game(row):
|
38 |
+
calendar_year = str(row['year'])
|
39 |
+
year_game_combined = str(row['game']) + " (" + calendar_year + ")"
|
40 |
+
return year_game_combined
|
41 |
+
|
42 |
+
df['game'] = df.apply(year_game, axis=1)
|
43 |
+
|
44 |
+
#['PC', '3DS', 'PlayStation Vita', 'Wii U', 'PlayStation 4','Xbox One', 'Switch']
|
45 |
+
df = df[df['platform'] == 'PlayStation 4']
|
46 |
+
|
47 |
+
#Filter by games since 2015 onwards
|
48 |
+
over_2015 = df[df['year'] >= 2015]
|
49 |
+
|
50 |
+
#Group by average score then sort by descending
|
51 |
+
top_recent_scorers = over_2015.groupby('game')['score'].mean().sort_values(ascending=False)
|
52 |
+
#Only show top 20 games
|
53 |
+
top_40_games = top_recent_scorers.index[:40]
|
54 |
+
#top_100_games = top_recent_scorers.index[:100]
|
55 |
+
|
56 |
+
df.shape
|
57 |
+
|
58 |
+
combined_games_data = df[['game','name','score']]
|
59 |
+
algorithms = [NMF(), SVD(), SVDpp(), KNNWithZScore(), CoClustering()]
|
60 |
+
|
61 |
+
def recommender(user_prof, user_algo=KNNWithZScore(), combined_games_data=combined_games_data): # top_100_games=top_100_games,
|
62 |
+
|
63 |
+
my_ratings = user_prof[user_prof['score'] != 0]
|
64 |
+
combined_games_data = pd.concat([combined_games_data, my_ratings], axis=0)
|
65 |
+
combined_games_data.columns = ['itemID', 'userID', 'rating']
|
66 |
+
|
67 |
+
# use the transform method group by userID and count to keep the games with more than reviews within user profile. Ideally 20 or more.
|
68 |
+
combined_games_data['reviews'] = combined_games_data.groupby(['itemID'])['rating'].transform('count')
|
69 |
+
combined_games_data = combined_games_data[combined_games_data.reviews>=my_ratings.shape[0]][['userID', 'itemID', 'rating']]
|
70 |
+
|
71 |
+
reader = Reader(rating_scale=(1.0, 100.0))
|
72 |
+
data = Dataset.load_from_df(combined_games_data, reader)
|
73 |
+
|
74 |
+
unique_ids = combined_games_data['itemID'].unique()
|
75 |
+
|
76 |
+
iids1001 = combined_games_data.loc[combined_games_data['userID']==1001, 'itemID']
|
77 |
+
|
78 |
+
games_to_predict = np.setdiff1d(unique_ids,iids1001)
|
79 |
+
|
80 |
+
for i in range(len(algorithms)):
|
81 |
+
if i == 'NMF':
|
82 |
+
user_algo = NMF()
|
83 |
+
elif i == 'SVD':
|
84 |
+
user_algo = SVD()
|
85 |
+
elif i == 'SVDpp':
|
86 |
+
user_algo = SVDpp()
|
87 |
+
elif i == 'KNN':
|
88 |
+
user_algo = KNNWithZScore()
|
89 |
+
elif i == 'CoClustering':
|
90 |
+
user_algo = CoClustering()
|
91 |
+
else:
|
92 |
+
user_algo = NMF()
|
93 |
+
|
94 |
+
algo = user_algo
|
95 |
+
algo.fit(data.build_full_trainset())
|
96 |
+
|
97 |
+
my_recs = []
|
98 |
+
for iid in games_to_predict:
|
99 |
+
my_recs.append((iid, algo.predict(uid=1001,iid=iid).est))
|
100 |
+
|
101 |
+
result = pd.DataFrame(my_recs, columns=['iid', 'predictions']).sort_values('predictions', ascending=False).head(10)
|
102 |
+
return result
|
103 |
+
|
104 |
+
default_entries = []
|
105 |
+
for record in range(len(top_40_games)):
|
106 |
+
default_entries.append([top_40_games[record], 1001, 0])
|
107 |
+
|
108 |
+
iface = gr.Interface(recommender,
|
109 |
+
inputs=[gr.inputs.Dataframe(
|
110 |
+
headers=['game','name','score'],
|
111 |
+
default=default_entries
|
112 |
+
),
|
113 |
+
gr.inputs.Radio(['NMF', 'SVD', 'SVDpp', 'KNN', 'CoClustering'])],
|
114 |
+
outputs="dataframe",
|
115 |
+
)
|
116 |
+
iface.launch(debug=True)
|