File size: 7,695 Bytes
346c8ea
 
 
 
 
 
c41f726
c79584f
3cb6066
346c8ea
c41f726
efb850b
346c8ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1107b8f
128b5df
efb850b
 
 
 
346c8ea
c41f726
346c8ea
c79584f
346c8ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c41f726
 
 
 
 
 
 
 
 
 
 
 
346c8ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c79584f
346c8ea
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import streamlit as st
import pandas as pd
import seaborn as sns
import numpy as np
import pickle
import matplotlib.pyplot as plt
from data_preparation import preprocess_data,data_imp
from clustering import perform_clustering, plot_clusters,summarize_cluster_characteristics
from feature_selection import select_features_pca, select_features_rfe, select_features_rf
from sklearn.preprocessing import StandardScaler

insurance_feature_descriptions,bankng_feature_descriptions,retail_feature_descriptions,insurance_defaults,banking_defaults,retail_defaults=data_imp()

def load_data(dataset_choice):
    if dataset_choice == "Insurance":
        data = pd.read_sas('a2z_insurance.sas7bdat',encoding='latin1') 
    elif dataset_choice == "Retail":
        data = pd.read_csv('retaildata.csv', encoding='latin1') 
    elif dataset_choice == "Banking":
        data = pd.read_csv('bankingdata.csv', encoding='latin1')
    return data

# Function to display Business Understanding section
def display_business_understanding():
    st.subheader("Business Objective")
    st.write("""
    ###### Customer segmentation is a fundamental task in marketing and customer relationship management. With the advancements in data analytics and machine learning, it is now possible to group customers into distinct segments with a high degree of precision, allowing businesses to tailor their marketing strategies and offerings to each segment's unique needs and preferences.

    ###### Through this customer segmentation, businesses can achieve:
    - **Personalization**: Tailoring marketing strategies to meet the unique needs of each segment.
    - **Optimization**: Efficient allocation of marketing resources.
    - **Insight**: Gaining a deeper understanding of the customer base.
    - **Engagement**: Enhancing customer engagement and satisfaction.

    ###### => Problem/Requirement: Utilize machine learning and data analysis techniques in Python to perform customer segmentation.
    
    """)
    st.image("Customer-Segmentation.png", caption="Customer Segmentation", use_column_width=True)

# Function to display Dataset section
def display_dataset_selection():
    dataset_choice = st.selectbox("Select Dataset", ("Insurance", "Retail", "Banking"))
    data = load_data(dataset_choice)
    st.write(f"Dataset: {dataset_choice}")
    st.write("Number of rows:", data.shape[0])
    st.write("Number of columns:", data.shape[1])
    st.write("First five rows of the data:")
    st.write(data.head())
    if dataset_choice=="Insurance":
        st.write(insurance_feature_descriptions)
    elif dataset_choice=="Retail":
        st.write(retail_feature_descriptions)
    else:
        st.write(bankng_feature_descriptions)
    return data
    
# Function to display Modeling & Evaluation section
def display_modeling_evaluation():
    dataset_choice = st.selectbox("Select Dataset", ("Insurance", "Retail", "Banking"))
    data = load_data(dataset_choice)
    data = preprocess_data(data)

    # Sidebar for feature selection and clustering method
    st.sidebar.header("Feature Selection and Clustering Method")
    feature_selection_method = st.sidebar.selectbox("Select feature selection method", ('PCA', 'RFE', 'Random Forest'))
    n_clusters = st.sidebar.slider("Number of clusters", min_value=2, max_value=10, value=3)

    if feature_selection_method == 'PCA':
        n_components = st.sidebar.slider("Number of PCA components", min_value=2, max_value=10, value=5)
    elif feature_selection_method in ['RFE', 'Random Forest']:
        n_features_to_select = st.sidebar.slider("Number of features to select", min_value=2, max_value=10, value=5)
    
    # Perform clustering on button click
    if st.sidebar.button("Cluster"):
        if feature_selection_method == 'PCA':
            selected_data, selected_features = select_features_pca(data, n_components)
        elif feature_selection_method == 'RFE':
            selected_data, selected_features = select_features_rfe(data, n_features_to_select)
        elif feature_selection_method == 'Random Forest':
            selected_data, selected_features = select_features_rf(data, n_features_to_select)

        st.write(f"Selected Features: {selected_features}")
        clustered_data, score, df_value_scaled, labels, model = perform_clustering(selected_data, n_clusters)
        st.write(f"Number of Clusters: {n_clusters}")
        st.write(f"Silhouette Score: {score}")
        st.write("Clustered Data")
        st.write(clustered_data)
        st.write("Cluster Visualization")
        plot_clusters(df_value_scaled, labels)

        # Store selected features and model in session state
        st.session_state.selected_features = selected_features
        st.session_state.model = model
        st.session_state.clustered_data = clustered_data
        st.session_state.labels = labels
        st.session_state.df_value_scaled = df_value_scaled

    # Predict new data based on selected features
    if 'selected_features' in st.session_state and 'model' in st.session_state:
        st.write("### Predict Cluster")

        # Use st.form to handle input fields
        with st.form(key='prediction_form'):
            user_input = {}
            for feature in st.session_state.selected_features:
                
                # Set default values based on the dataset choice
                if dataset_choice == "Insurance":
                    default_value = insurance_defaults.get(feature, 0.0)
                elif dataset_choice == "Banking":
                    default_value = banking_defaults.get(feature, 0.0)
                elif dataset_choice == "Retail":
                    default_value = retail_defaults.get(feature, 0.0)
                else:
                    default_value = 0.0

                user_input[feature] = st.number_input(f'Enter {feature}', value=default_value)

            submit_button = st.form_submit_button(label='Predict')

            if submit_button:
                user_df = pd.DataFrame(user_input, index=[0])

                scaler = StandardScaler()
                user_df_scaled = scaler.fit_transform(user_df)

                cluster = st.session_state.model.predict(user_df_scaled)
                st.write(f'The predicted cluster for the input data is: {cluster[0]}')

                # Get the clustered data and labels from session state
                clustered_data = st.session_state.clustered_data
                labels = st.session_state.labels
                df_value_scaled = st.session_state.df_value_scaled

                # Summarize cluster characteristics
                summary = summarize_cluster_characteristics(clustered_data, labels, cluster[0])

                # Generate and display the inference
                inference = f"Based on the input features, the customer belongs to Cluster {cluster[0]}, which is characterized by the following average values:\n"
                for feature, value in summary.items():
                    inference += f"- {feature}: {value:.2f}\n"
                st.write(inference)

                plot_clusters(df_value_scaled, labels, new_data_point=user_df_scaled)


# Main app structure
def main():
    st.title("Customer Segmentation Demo")
    st.header("Customer Segmentation")

    # Sidebar menu options
    menu = ["Business Understanding", "Dataset", "Modeling & Prediction"]
    choice = st.sidebar.selectbox('Menu', menu)

    if choice == 'Business Understanding':
        display_business_understanding()

    elif choice == 'Dataset':
        display_dataset_selection()

    elif choice == 'Modeling & Prediction':
        display_modeling_evaluation()

if __name__ == "__main__":
    main()