File size: 7,695 Bytes
346c8ea c41f726 c79584f 3cb6066 346c8ea c41f726 efb850b 346c8ea 1107b8f 128b5df efb850b 346c8ea c41f726 346c8ea c79584f 346c8ea c41f726 346c8ea c79584f 346c8ea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
import streamlit as st
import pandas as pd
import seaborn as sns
import numpy as np
import pickle
import matplotlib.pyplot as plt
from data_preparation import preprocess_data,data_imp
from clustering import perform_clustering, plot_clusters,summarize_cluster_characteristics
from feature_selection import select_features_pca, select_features_rfe, select_features_rf
from sklearn.preprocessing import StandardScaler
insurance_feature_descriptions,bankng_feature_descriptions,retail_feature_descriptions,insurance_defaults,banking_defaults,retail_defaults=data_imp()
def load_data(dataset_choice):
if dataset_choice == "Insurance":
data = pd.read_sas('a2z_insurance.sas7bdat',encoding='latin1')
elif dataset_choice == "Retail":
data = pd.read_csv('retaildata.csv', encoding='latin1')
elif dataset_choice == "Banking":
data = pd.read_csv('bankingdata.csv', encoding='latin1')
return data
# Function to display Business Understanding section
def display_business_understanding():
st.subheader("Business Objective")
st.write("""
###### Customer segmentation is a fundamental task in marketing and customer relationship management. With the advancements in data analytics and machine learning, it is now possible to group customers into distinct segments with a high degree of precision, allowing businesses to tailor their marketing strategies and offerings to each segment's unique needs and preferences.
###### Through this customer segmentation, businesses can achieve:
- **Personalization**: Tailoring marketing strategies to meet the unique needs of each segment.
- **Optimization**: Efficient allocation of marketing resources.
- **Insight**: Gaining a deeper understanding of the customer base.
- **Engagement**: Enhancing customer engagement and satisfaction.
###### => Problem/Requirement: Utilize machine learning and data analysis techniques in Python to perform customer segmentation.
""")
st.image("Customer-Segmentation.png", caption="Customer Segmentation", use_column_width=True)
# Function to display Dataset section
def display_dataset_selection():
dataset_choice = st.selectbox("Select Dataset", ("Insurance", "Retail", "Banking"))
data = load_data(dataset_choice)
st.write(f"Dataset: {dataset_choice}")
st.write("Number of rows:", data.shape[0])
st.write("Number of columns:", data.shape[1])
st.write("First five rows of the data:")
st.write(data.head())
if dataset_choice=="Insurance":
st.write(insurance_feature_descriptions)
elif dataset_choice=="Retail":
st.write(retail_feature_descriptions)
else:
st.write(bankng_feature_descriptions)
return data
# Function to display Modeling & Evaluation section
def display_modeling_evaluation():
dataset_choice = st.selectbox("Select Dataset", ("Insurance", "Retail", "Banking"))
data = load_data(dataset_choice)
data = preprocess_data(data)
# Sidebar for feature selection and clustering method
st.sidebar.header("Feature Selection and Clustering Method")
feature_selection_method = st.sidebar.selectbox("Select feature selection method", ('PCA', 'RFE', 'Random Forest'))
n_clusters = st.sidebar.slider("Number of clusters", min_value=2, max_value=10, value=3)
if feature_selection_method == 'PCA':
n_components = st.sidebar.slider("Number of PCA components", min_value=2, max_value=10, value=5)
elif feature_selection_method in ['RFE', 'Random Forest']:
n_features_to_select = st.sidebar.slider("Number of features to select", min_value=2, max_value=10, value=5)
# Perform clustering on button click
if st.sidebar.button("Cluster"):
if feature_selection_method == 'PCA':
selected_data, selected_features = select_features_pca(data, n_components)
elif feature_selection_method == 'RFE':
selected_data, selected_features = select_features_rfe(data, n_features_to_select)
elif feature_selection_method == 'Random Forest':
selected_data, selected_features = select_features_rf(data, n_features_to_select)
st.write(f"Selected Features: {selected_features}")
clustered_data, score, df_value_scaled, labels, model = perform_clustering(selected_data, n_clusters)
st.write(f"Number of Clusters: {n_clusters}")
st.write(f"Silhouette Score: {score}")
st.write("Clustered Data")
st.write(clustered_data)
st.write("Cluster Visualization")
plot_clusters(df_value_scaled, labels)
# Store selected features and model in session state
st.session_state.selected_features = selected_features
st.session_state.model = model
st.session_state.clustered_data = clustered_data
st.session_state.labels = labels
st.session_state.df_value_scaled = df_value_scaled
# Predict new data based on selected features
if 'selected_features' in st.session_state and 'model' in st.session_state:
st.write("### Predict Cluster")
# Use st.form to handle input fields
with st.form(key='prediction_form'):
user_input = {}
for feature in st.session_state.selected_features:
# Set default values based on the dataset choice
if dataset_choice == "Insurance":
default_value = insurance_defaults.get(feature, 0.0)
elif dataset_choice == "Banking":
default_value = banking_defaults.get(feature, 0.0)
elif dataset_choice == "Retail":
default_value = retail_defaults.get(feature, 0.0)
else:
default_value = 0.0
user_input[feature] = st.number_input(f'Enter {feature}', value=default_value)
submit_button = st.form_submit_button(label='Predict')
if submit_button:
user_df = pd.DataFrame(user_input, index=[0])
scaler = StandardScaler()
user_df_scaled = scaler.fit_transform(user_df)
cluster = st.session_state.model.predict(user_df_scaled)
st.write(f'The predicted cluster for the input data is: {cluster[0]}')
# Get the clustered data and labels from session state
clustered_data = st.session_state.clustered_data
labels = st.session_state.labels
df_value_scaled = st.session_state.df_value_scaled
# Summarize cluster characteristics
summary = summarize_cluster_characteristics(clustered_data, labels, cluster[0])
# Generate and display the inference
inference = f"Based on the input features, the customer belongs to Cluster {cluster[0]}, which is characterized by the following average values:\n"
for feature, value in summary.items():
inference += f"- {feature}: {value:.2f}\n"
st.write(inference)
plot_clusters(df_value_scaled, labels, new_data_point=user_df_scaled)
# Main app structure
def main():
st.title("Customer Segmentation Demo")
st.header("Customer Segmentation")
# Sidebar menu options
menu = ["Business Understanding", "Dataset", "Modeling & Prediction"]
choice = st.sidebar.selectbox('Menu', menu)
if choice == 'Business Understanding':
display_business_understanding()
elif choice == 'Dataset':
display_dataset_selection()
elif choice == 'Modeling & Prediction':
display_modeling_evaluation()
if __name__ == "__main__":
main()
|