mindwrapped commited on
Commit
ef7759a
·
1 Parent(s): baa7d44

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -0
app.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from zipfile import ZipFile
4
+ import tensorflow as tf
5
+ from tensorflow import keras
6
+ from pathlib import Path
7
+ import matplotlib.pyplot as plt
8
+ import gradio as gr
9
+ from huggingface_hub import from_pretrained_keras
10
+
11
+ # Download the actual data from http://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
12
+ # Use the ratings.csv file
13
+ movielens_data_file_url = (
14
+ "http://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
15
+ )
16
+ movielens_zipped_file = keras.utils.get_file(
17
+ "ml-latest-small.zip", movielens_data_file_url, extract=False
18
+ )
19
+ keras_datasets_path = Path(movielens_zipped_file).parents[0]
20
+ movielens_dir = keras_datasets_path / "ml-latest-small"
21
+
22
+ # Only extract the data the first time the script is run.
23
+ if not movielens_dir.exists():
24
+ with ZipFile(movielens_zipped_file, "r") as zip:
25
+ # Extract files
26
+ print("Extracting all the files now...")
27
+ zip.extractall(path=keras_datasets_path)
28
+ print("Done!")
29
+
30
+ ratings_file = movielens_dir / "ratings.csv"
31
+ df = pd.read_csv(ratings_file)
32
+
33
+ # Make all the encodings
34
+ user_ids = df["userId"].unique().tolist()
35
+ user2user_encoded = {x: i for i, x in enumerate(user_ids)}
36
+ userencoded2user = {i: x for i, x in enumerate(user_ids)}
37
+ movie_ids = df["movieId"].unique().tolist()
38
+ movie2movie_encoded = {x: i for i, x in enumerate(movie_ids)}
39
+ movie_encoded2movie = {i: x for i, x in enumerate(movie_ids)}
40
+ df["user"] = df["userId"].map(user2user_encoded)
41
+ df["movie"] = df["movieId"].map(movie2movie_encoded)
42
+
43
+ num_users = len(user2user_encoded)
44
+ num_movies = len(movie_encoded2movie)
45
+ df["rating"] = df["rating"].values.astype(np.float32)
46
+ # min and max ratings will be used to normalize the ratings later
47
+ min_rating = min(df["rating"])
48
+ max_rating = max(df["rating"])
49
+
50
+ # Load model
51
+ model = from_pretrained_keras('mindwrapped/collaborative-filtering-movielens')
52
+ movie_df = pd.read_csv(movielens_dir / "movies.csv")
53
+
54
+
55
+ def update_user(id):
56
+ return get_top_rated_from_user(id), get_recommendations(id)
57
+
58
+
59
+ def get_top_rated_from_user(id):
60
+ decoded_id = userencoded2user.get(id)
61
+ movies_watched_by_user = df[df.userId == decoded_id]
62
+
63
+ # Get the top rated movies by this user
64
+ top_movies_user = (
65
+ movies_watched_by_user.sort_values(by="rating", ascending=False)
66
+ .head(5)
67
+ .movieId.values
68
+ )
69
+ movie_df_rows = movie_df[movie_df["movieId"].isin(top_movies_user)]
70
+ movie_df_rows = movie_df_rows.drop('movieId', axis=1)
71
+ return movie_df_rows
72
+
73
+
74
+ def random_user():
75
+ return update_user(np.random.randint(0, num_users))
76
+
77
+
78
+ def get_recommendations(id):
79
+ decoded_id = userencoded2user.get(id)
80
+ movies_watched_by_user = df[df.userId == decoded_id]
81
+
82
+ # Get the top 10 recommended movies for this user
83
+ movies_not_watched = movie_df[
84
+ ~movie_df["movieId"].isin(movies_watched_by_user.movieId.values)
85
+ ]["movieId"]
86
+ movies_not_watched = list(
87
+ set(movies_not_watched).intersection(set(movie2movie_encoded.keys()))
88
+ )
89
+ movies_not_watched = [[movie2movie_encoded.get(x)] for x in movies_not_watched]
90
+
91
+ # Encode user
92
+ user_encoder = id
93
+
94
+ # Create data [[user_id, movie_id],...]
95
+ user_movie_array = np.hstack(
96
+ ([[user_encoder]] * len(movies_not_watched), movies_not_watched)
97
+ )
98
+
99
+ # Predict ratings for movies not watched
100
+ ratings = model.predict(user_movie_array).flatten()
101
+
102
+ # Get indices of top ten movies
103
+ top_ratings_indices = ratings.argsort()[-10:][::-1]
104
+
105
+ # Decode each movie
106
+ recommended_movie_ids = [
107
+ movie_encoded2movie.get(movies_not_watched[x][0]) for x in top_ratings_indices
108
+ ]
109
+ recommended_movies = movie_df[movie_df["movieId"].isin(recommended_movie_ids)]
110
+ recommended_movies = recommended_movies.drop('movieId', axis=1)
111
+
112
+ return recommended_movies
113
+
114
+ demo = gr.Blocks()
115
+
116
+ with demo:
117
+ with gr.Box():
118
+ gr.Markdown(
119
+ """
120
+ ## Input
121
+ #### Select a user to get recommendations for.
122
+ """)
123
+
124
+ inp1 = gr.Slider(0, num_users, value=0, label='User')
125
+ # btn1 = gr.Button('Random User')
126
+
127
+ # top_rated_from_user = get_top_rated_from_user(0)
128
+
129
+ gr.Markdown(
130
+ """
131
+ #### Movies with the Highest Ratings from this user
132
+ """)
133
+ df1 = gr.DataFrame(interactive=False)
134
+
135
+ with gr.Box():
136
+ gr.Markdown('## Output')
137
+ # recommendations = get_recommendations(0)
138
+ gr.Markdown(
139
+ """
140
+ #### Top 10 movie recommendations
141
+ """)
142
+ df2 = gr.DataFrame(interactive=False)
143
+
144
+ gr.HTML("""
145
+ <p style='text-align: center; color: #C7C7C7'>
146
+ <a href='https://keras.io/examples/structured_data/collaborative_filtering_movielens/' target='_blank' style='text-decoration: underline'>Keras Example by Siddhartha Banerjee</a>
147
+ <br>
148
+ Space by Scott Krstyen (mindwrapped)
149
+ </p>
150
+ """)
151
+
152
+
153
+ inp1.change(fn=update_user,
154
+ inputs=inp1,
155
+ outputs=[df1, df2])
156
+
157
+
158
+ demo.launch()