simran0608 commited on
Commit
346c8ea
1 Parent(s): 86c4e37

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -0
app.py CHANGED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import numpy as np
5
+ import pickle
6
+ import matplotlib.pyplot as plt
7
+ from src.data_preparation import preprocess_data
8
+ from src.clustering import perform_clustering, plot_clusters
9
+ from src.feature_selection import select_features_pca, select_features_rfe, select_features_rf
10
+ import os
11
+ from sklearn.preprocessing import StandardScaler
12
+
13
+ def load_data(dataset_choice):
14
+ if dataset_choice == "Insurance":
15
+ data = pd.read_sas('a2z_insurance.sas7bdat',encoding='latin1')
16
+ elif dataset_choice == "Retail":
17
+ data = pd.read_csv('retaildata.csv', encoding='latin1')
18
+ elif dataset_choice == "Banking":
19
+ data = pd.read_csv('bankingdata.csv', encoding='latin1')
20
+ return data
21
+
22
+ return data
23
+ # Function to summarize cluster characteristics
24
+ def summarize_cluster_characteristics(clustered_data, labels, cluster_number):
25
+ cluster_data = clustered_data[labels == cluster_number]
26
+ summary = cluster_data.mean().to_dict()
27
+ return summary
28
+
29
+ # Function to display Business Understanding section
30
+ def display_business_understanding():
31
+ st.subheader("Business Objective")
32
+ st.write("""
33
+ ###### Customer segmentation is a fundamental task in marketing and customer relationship management. With the advancements in data analytics and machine learning, it is now possible to group customers into distinct segments with a high degree of precision, allowing businesses to tailor their marketing strategies and offerings to each segment's unique needs and preferences.
34
+
35
+ ###### Through this customer segmentation, businesses can achieve:
36
+ - **Personalization**: Tailoring marketing strategies to meet the unique needs of each segment.
37
+ - **Optimization**: Efficient allocation of marketing resources.
38
+ - **Insight**: Gaining a deeper understanding of the customer base.
39
+ - **Engagement**: Enhancing customer engagement and satisfaction.
40
+
41
+ ###### => Problem/Requirement: Utilize machine learning and data analysis techniques in Python to perform customer segmentation.
42
+
43
+ """)
44
+ st.image("Customer-Segmentation.png", caption="Customer Segmentation", use_column_width=True)
45
+
46
+ # Function to display Dataset section
47
+ def display_dataset_selection():
48
+ dataset_choice = st.selectbox("Select Dataset", ("Insurance", "Retail", "Banking"))
49
+ data = load_data(dataset_choice)
50
+ st.write(f"Dataset: {dataset_choice}")
51
+ st.write("Number of rows:", data.shape[0])
52
+ st.write("Number of columns:", data.shape[1])
53
+ st.write("First five rows of the data:")
54
+ st.write(data.head())
55
+ return data
56
+ # Function to display Modeling & Evaluation section
57
+ def display_modeling_evaluation():
58
+ dataset_choice = st.selectbox("Select Dataset", ("Insurance", "Retail", "Banking"))
59
+ data = load_data(dataset_choice)
60
+ data = preprocess_data(data)
61
+
62
+ # Sidebar for feature selection and clustering method
63
+ st.sidebar.header("Feature Selection and Clustering Method")
64
+ feature_selection_method = st.sidebar.selectbox("Select feature selection method", ('PCA', 'RFE', 'Random Forest'))
65
+ n_clusters = st.sidebar.slider("Number of clusters", min_value=2, max_value=10, value=3)
66
+
67
+ if feature_selection_method == 'PCA':
68
+ n_components = st.sidebar.slider("Number of PCA components", min_value=2, max_value=10, value=5)
69
+ elif feature_selection_method in ['RFE', 'Random Forest']:
70
+ n_features_to_select = st.sidebar.slider("Number of features to select", min_value=2, max_value=10, value=5)
71
+
72
+ # Perform clustering on button click
73
+ if st.sidebar.button("Cluster"):
74
+ if feature_selection_method == 'PCA':
75
+ selected_data, selected_features = select_features_pca(data, n_components)
76
+ elif feature_selection_method == 'RFE':
77
+ selected_data, selected_features = select_features_rfe(data, n_features_to_select)
78
+ elif feature_selection_method == 'Random Forest':
79
+ selected_data, selected_features = select_features_rf(data, n_features_to_select)
80
+
81
+ st.write(f"Selected Features: {selected_features}")
82
+ clustered_data, score, df_value_scaled, labels, model = perform_clustering(selected_data, n_clusters)
83
+ st.write(f"Number of Clusters: {n_clusters}")
84
+ st.write(f"Silhouette Score: {score}")
85
+ st.write("Clustered Data")
86
+ st.write(clustered_data)
87
+ st.write("Cluster Visualization")
88
+ plot_clusters(df_value_scaled, labels)
89
+
90
+ # Store selected features and model in session state
91
+ st.session_state.selected_features = selected_features
92
+ st.session_state.model = model
93
+ st.session_state.clustered_data = clustered_data
94
+ st.session_state.labels = labels
95
+ st.session_state.df_value_scaled = df_value_scaled
96
+
97
+ # Predict new data based on selected features
98
+ if 'selected_features' in st.session_state and 'model' in st.session_state:
99
+ st.write("### Predict Cluster")
100
+
101
+ # Use st.form to handle input fields
102
+ with st.form(key='prediction_form'):
103
+ user_input = {}
104
+ for feature in st.session_state.selected_features:
105
+ user_input[feature] = st.number_input(f'Enter {feature}', value=0.0)
106
+
107
+ submit_button = st.form_submit_button(label='Predict')
108
+
109
+ if submit_button:
110
+ user_df = pd.DataFrame(user_input, index=[0])
111
+
112
+ scaler = StandardScaler()
113
+ user_df_scaled = scaler.fit_transform(user_df)
114
+
115
+ cluster = st.session_state.model.predict(user_df_scaled)
116
+ st.write(f'The predicted cluster for the input data is: {cluster[0]}')
117
+
118
+ # Get the clustered data and labels from session state
119
+ clustered_data = st.session_state.clustered_data
120
+ labels = st.session_state.labels
121
+ df_value_scaled = st.session_state.df_value_scaled
122
+
123
+ # Summarize cluster characteristics
124
+ summary = summarize_cluster_characteristics(clustered_data, labels, cluster[0])
125
+
126
+ # Generate and display the inference
127
+ inference = f"Based on the input features, the customer belongs to Cluster {cluster[0]}, which is characterized by the following average values:\n"
128
+ for feature, value in summary.items():
129
+ inference += f"- {feature}: {value:.2f}\n"
130
+ st.write(inference)
131
+
132
+ plot_clusters(df_value_scaled, labels, new_data_point=user_df_scaled)
133
+
134
+
135
+ # Main app structure
136
+ def main():
137
+ st.title("Customer Segmentation Demo")
138
+ st.header("Customer Segmentation")
139
+
140
+ # Sidebar menu options
141
+ menu = ["Business Understanding", "Dataset", "Modeling & Prediction"]
142
+ choice = st.sidebar.selectbox('Menu', menu)
143
+
144
+ if choice == 'Business Understanding':
145
+ display_business_understanding()
146
+
147
+ elif choice == 'Dataset':
148
+ display_dataset_selection()
149
+
150
+ elif choice == 'Modeling & Prediction':
151
+ display_modeling_evaluation()
152
+
153
+ if __name__ == "__main__":
154
+ main()