Spaces:
Runtime error
Runtime error
change cluster vertical bar
Browse files- pages/clustering.py +7 -6
pages/clustering.py
CHANGED
@@ -22,14 +22,14 @@ To cluster a client, we adopt the RFM metrics. They stand for:
|
|
22 |
|
23 |
Given these 3 metrics, we can cluster the customers and find a suitable
|
24 |
"definition" based on the clusters they belong to. Since the dataset
|
25 |
-
we're using right now
|
26 |
3 clusters for each metric.
|
27 |
|
28 |
## How we compute the clusters
|
29 |
|
30 |
-
We resort to a
|
31 |
-
|
32 |
-
|
33 |
""".lstrip()
|
34 |
|
35 |
FREQUENCY_CLUSTERS_EXPLAIN = """
|
@@ -238,7 +238,8 @@ def plot_rfm_distribution(df_rfm: pd.DataFrame, cluster_info: Dict[str, List[int
|
|
238 |
# Get the max value in the cluster info. The cluster info is a list of min - max
|
239 |
# values per cluster.
|
240 |
values = cluster_info[f"{x}_cluster"]
|
241 |
-
|
|
|
242 |
fig.add_vline(
|
243 |
x=values[i],
|
244 |
annotation_text=f"End of cluster {n_cluster+1}",
|
@@ -299,7 +300,7 @@ def display_dataframe_heatmap(df_rfm: pd.DataFrame):
|
|
299 |
# and then display it.
|
300 |
st.markdown("## Heatmap: how the client are distributed between clusters")
|
301 |
st.write(
|
302 |
-
count.style.format(thousands=" ", precision=0, na_rep="
|
303 |
.set_table_styles([cell_hover, index_names, headers])
|
304 |
.background_gradient(cmap="coolwarm")
|
305 |
.to_html(),
|
|
|
22 |
|
23 |
Given these 3 metrics, we can cluster the customers and find a suitable
|
24 |
"definition" based on the clusters they belong to. Since the dataset
|
25 |
+
we're using right now has about 5000 distinct customers, we identify
|
26 |
3 clusters for each metric.
|
27 |
|
28 |
## How we compute the clusters
|
29 |
|
30 |
+
We resort to a GaussianMixture algorithm. We can think of GaussianMixture
|
31 |
+
as generalized k-means clustering that incorporates information about
|
32 |
+
the covariance structure of the data as well as the centers of the clusters.
|
33 |
""".lstrip()
|
34 |
|
35 |
FREQUENCY_CLUSTERS_EXPLAIN = """
|
|
|
238 |
# Get the max value in the cluster info. The cluster info is a list of min - max
|
239 |
# values per cluster.
|
240 |
values = cluster_info[f"{x}_cluster"]
|
241 |
+
# Add vertical bar on each cluster end. But skip the last cluster.
|
242 |
+
for n_cluster, i in enumerate(range(1, len(values)-1, 2)):
|
243 |
fig.add_vline(
|
244 |
x=values[i],
|
245 |
annotation_text=f"End of cluster {n_cluster+1}",
|
|
|
300 |
# and then display it.
|
301 |
st.markdown("## Heatmap: how the client are distributed between clusters")
|
302 |
st.write(
|
303 |
+
count.style.format(thousands=" ", precision=0, na_rep="0")
|
304 |
.set_table_styles([cell_hover, index_names, headers])
|
305 |
.background_gradient(cmap="coolwarm")
|
306 |
.to_html(),
|