|
import streamlit as st
|
|
import pandas as pd
|
|
import altair as alt
|
|
from recommender import Recommender
|
|
from sklearn.decomposition import PCA
|
|
from sklearn.manifold import TSNE
|
|
from os import cpu_count
|
|
import numpy as np
|
|
import time
|
|
|
|
from utils import load_and_preprocess_data
|
|
|
|
import matplotlib.pyplot as plt
|
|
from typing import Union, List, Dict, Any
|
|
import plotly.graph_objects as go
|
|
|
|
|
|
COLUMN_NOT_DISPLAY = [
|
|
"StockCode",
|
|
"UnitPrice",
|
|
"Country",
|
|
"CustomerIndex",
|
|
"ProductIndex",
|
|
]
|
|
|
|
|
|
SIDEBAR_DESCRIPTION = """
|
|
# Recommender system
|
|
|
|
## What is it?
|
|
A recommender system is a tool that suggests something new to a particular
|
|
user that she/he might be interested in. It becomes useful when
|
|
the number of items a user can choose from is high.
|
|
|
|
## How does it work?
|
|
A recommender system internally finds similar users and similar items,
|
|
based on a suitable definition of "similarity".
|
|
For example, users that purchased the same items can be considered similar.
|
|
When we want to suggest new items to a user, a recommender system exploits
|
|
the items bought by similar users as a starting point for the suggestion.
|
|
The items bought by similar users are compared to the items that the user
|
|
already bought. If they are new and similar, the model suggests them.
|
|
|
|
## How we prepare the data
|
|
For each user, we compute the quantity purchased for every single item.
|
|
This will be the metric the value considered by the model to compute
|
|
the similarity. The item that a user has never bought will
|
|
be left at zero. These zeros will be the subject of the recommendation.
|
|
""".lstrip()
|
|
|
|
|
|
@st.cache(allow_output_mutation=True)
|
|
def create_and_fit_recommender(
|
|
model_name: str,
|
|
values: Union[pd.DataFrame, "np.ndarray"],
|
|
users: Union[pd.DataFrame, "np.ndarray"],
|
|
products: Union[pd.DataFrame, "np.ndarray"],
|
|
) -> Recommender:
|
|
recommender = Recommender(
|
|
values,
|
|
users,
|
|
products,
|
|
)
|
|
|
|
recommender.create_and_fit(
|
|
model_name,
|
|
|
|
model_params=dict(
|
|
factors=190,
|
|
alpha=0.6,
|
|
regularization=0.06,
|
|
random_state=42,
|
|
),
|
|
)
|
|
return recommender
|
|
|
|
|
|
def explain_recommendation(
|
|
recommender: Recommender,
|
|
user_id: int,
|
|
suggestions: List[int],
|
|
df: pd.DataFrame,
|
|
):
|
|
output = []
|
|
|
|
n_recommended = len(suggestions)
|
|
for suggestion in suggestions:
|
|
explained = recommender.explain_recommendation(
|
|
user_id, suggestion, n_recommended
|
|
)
|
|
|
|
suggested_items_id = [id[0] for id in explained]
|
|
|
|
suggested_description = (
|
|
df.loc[df.ProductIndex == suggestion][["Description", "ProductIndex"]]
|
|
.drop_duplicates(subset=["ProductIndex"])["Description"]
|
|
.unique()[0]
|
|
)
|
|
similar_items_description = (
|
|
df.loc[df["ProductIndex"].isin(suggested_items_id)][
|
|
["Description", "ProductIndex"]
|
|
]
|
|
.drop_duplicates(subset=["ProductIndex"])["Description"]
|
|
.unique()
|
|
)
|
|
|
|
output.append(
|
|
f"The item **{suggested_description.strip()}** "
|
|
"has been suggested because it is similar to the following products"
|
|
" bought by the user:"
|
|
)
|
|
for description in similar_items_description:
|
|
output.append(f"- {description.strip()}")
|
|
|
|
with st.expander("See why the model recommended these products"):
|
|
st.write("\n".join(output))
|
|
|
|
st.write("------")
|
|
|
|
|
|
def print_suggestions(suggestions: List[int], df: pd.DataFrame):
|
|
similar_items_description = (
|
|
df.loc[df["ProductIndex"].isin(suggestions)][["Description", "ProductIndex"]]
|
|
.drop_duplicates(subset=["ProductIndex"])["Description"]
|
|
.unique()
|
|
)
|
|
|
|
output = ["The model suggests the following products:"]
|
|
for description in similar_items_description:
|
|
output.append(f"- {description.strip()}")
|
|
|
|
st.write("\n".join(output))
|
|
|
|
|
|
def display_user_char(user: int, data: pd.DataFrame):
|
|
subset = data[data.CustomerIndex == user]
|
|
|
|
|
|
|
|
|
|
st.write(
|
|
"The user {} bought {} distinct products. Here is the purchase history: ".format(
|
|
user, subset["Description"].nunique()
|
|
)
|
|
)
|
|
st.dataframe(
|
|
subset.sort_values("InvoiceDate").drop(
|
|
|
|
|
|
COLUMN_NOT_DISPLAY + ["CustomerID"],
|
|
axis=1,
|
|
)
|
|
)
|
|
st.write("-----")
|
|
|
|
|
|
def _extract_description(df, products):
|
|
desc = df[df["ProductIndex"].isin(products)].drop_duplicates(
|
|
"ProductIndex", ignore_index=True
|
|
)[["ProductIndex", "Description"]]
|
|
return desc.set_index("ProductIndex")
|
|
|
|
|
|
def display_recommendation_plots(
|
|
user_id: int,
|
|
suggestions: List[int],
|
|
df: pd.DataFrame,
|
|
model: Recommender,
|
|
):
|
|
"""Plots a t-SNE with the suggested items, togheter with the purchases of
|
|
similar users.
|
|
"""
|
|
|
|
contributions = []
|
|
n_recommended = len(suggestions)
|
|
for suggestion in suggestions:
|
|
items_and_score = model.explain_recommendation(
|
|
user_id, suggestion, n_recommended
|
|
)
|
|
contributions.append([t[0] for t in items_and_score])
|
|
|
|
contributions = np.unique(np.concatenate(contributions))
|
|
|
|
print("Contribution computed")
|
|
print(contributions)
|
|
print("=" * 80)
|
|
|
|
|
|
bought_by_similar_users = []
|
|
|
|
sim_users, _ = model.similar_users(user_id)
|
|
|
|
for u in sim_users:
|
|
_, sim_purchases = model.user_product_matrix[u].nonzero()
|
|
bought_by_similar_users.append(sim_purchases)
|
|
|
|
bought_by_similar_users = np.unique(np.concatenate(bought_by_similar_users))
|
|
|
|
print("Similar bought computed")
|
|
print(bought_by_similar_users)
|
|
print("=" * 80)
|
|
|
|
|
|
|
|
|
|
to_decompose = np.concatenate(
|
|
(
|
|
model.item_factors[suggestions],
|
|
model.item_factors[contributions],
|
|
model.item_factors[bought_by_similar_users],
|
|
)
|
|
)
|
|
|
|
print(f"Shape to decompose: {to_decompose.shape}")
|
|
|
|
with st.spinner("Computing plots (this might take around 60 seconds)..."):
|
|
elapsed = time.time()
|
|
decomposed = _tsne_decomposition(
|
|
to_decompose,
|
|
dict(
|
|
perplexity=30,
|
|
metric="euclidean",
|
|
n_iter=1_000,
|
|
random_state=42,
|
|
),
|
|
)
|
|
elapsed = time.time() - elapsed
|
|
print(f"TSNE computed in {elapsed}")
|
|
print("=" * 80)
|
|
|
|
|
|
suggestion_dec = decomposed[: len(suggestions), :]
|
|
contribution_dec = decomposed[
|
|
len(suggestions) : len(suggestions) + len(contributions), :
|
|
]
|
|
items_others_dec = decomposed[-len(bought_by_similar_users) :, :]
|
|
|
|
|
|
|
|
|
|
contribution_description = _extract_description(df, contributions)
|
|
items_other_description = _extract_description(df, bought_by_similar_users)
|
|
suggestion_description = _extract_description(df, suggestions)
|
|
|
|
|
|
|
|
fig = go.Figure()
|
|
|
|
fig.add_trace(
|
|
go.Scatter(
|
|
x=contribution_dec[:, 0],
|
|
y=contribution_dec[:, 1],
|
|
mode="markers",
|
|
opacity=0.8,
|
|
name="Similar bought by user",
|
|
marker_symbol="square-open",
|
|
marker_color="#010CFA",
|
|
marker_size=10,
|
|
hovertext=contribution_description.loc[contributions].values.squeeze(),
|
|
)
|
|
)
|
|
|
|
fig.add_trace(
|
|
go.Scatter(
|
|
x=items_others_dec[:, 0],
|
|
y=items_others_dec[:, 1],
|
|
mode="markers",
|
|
name="Product bought by similar users",
|
|
opacity=0.7,
|
|
marker_symbol="circle-open",
|
|
marker_color="#FA5F19",
|
|
marker_size=10,
|
|
hovertext=items_other_description.loc[
|
|
bought_by_similar_users
|
|
].values.squeeze(),
|
|
)
|
|
)
|
|
|
|
fig.add_trace(
|
|
go.Scatter(
|
|
x=suggestion_dec[:, 0],
|
|
y=suggestion_dec[:, 1],
|
|
mode="markers",
|
|
name="Suggested",
|
|
marker_color="#1A9626",
|
|
marker_symbol="star",
|
|
marker_size=10,
|
|
hovertext=suggestion_description.loc[suggestions].values.squeeze(),
|
|
)
|
|
)
|
|
|
|
fig.update_xaxes(visible=False)
|
|
fig.update_yaxes(visible=False)
|
|
fig.update_layout(plot_bgcolor="white")
|
|
|
|
return fig
|
|
|
|
|
|
def _tsne_decomposition(data: np.ndarray, tsne_args: Dict[str, Any]):
|
|
if data.shape[1] > 50:
|
|
print("Performing PCA...")
|
|
data = PCA(n_components=50).fit_transform(data)
|
|
return TSNE(
|
|
n_components=2,
|
|
n_jobs=cpu_count(),
|
|
**tsne_args,
|
|
).fit_transform(data)
|
|
|
|
|
|
def main():
|
|
|
|
data, users, products = load_and_preprocess_data()
|
|
recommender = create_and_fit_recommender(
|
|
"als",
|
|
data["Quantity"],
|
|
users,
|
|
products,
|
|
)
|
|
|
|
st.markdown(
|
|
"""# Recommender system
|
|
The dataset used for these computations is the following:
|
|
"""
|
|
)
|
|
st.sidebar.markdown(SIDEBAR_DESCRIPTION)
|
|
|
|
to_display = data.drop(
|
|
COLUMN_NOT_DISPLAY,
|
|
axis=1,
|
|
)
|
|
|
|
|
|
|
|
|
|
to_display["Price"] = to_display["Price"].astype(int)
|
|
|
|
|
|
st.dataframe(
|
|
to_display,
|
|
)
|
|
|
|
st.markdown("## Interactive suggestion")
|
|
with st.form("recommend"):
|
|
|
|
user = st.selectbox(
|
|
"Select a customer to get his recommendations",
|
|
users.unique(),
|
|
)
|
|
|
|
items_to_recommend = st.slider("How many items to recommend?", 1, 10, 5)
|
|
print(items_to_recommend)
|
|
|
|
submitted = st.form_submit_button("Recommend!")
|
|
if submitted:
|
|
|
|
display_user_char(user, data)
|
|
suggestions_and_score = recommender.recommend_products(
|
|
user, items_to_recommend
|
|
)
|
|
print_suggestions(suggestions_and_score[0], data)
|
|
explain_recommendation(recommender, user, suggestions_and_score[0], data)
|
|
|
|
st.markdown(
|
|
"## How the purchases of similar users influnce the recommendation"
|
|
)
|
|
fig = display_recommendation_plots(
|
|
user, suggestions_and_score[0], data, recommender
|
|
)
|
|
st.plotly_chart(fig)
|
|
|
|
|
|
main()
|
|
|