dperales commited on
Commit
495d0f4
·
1 Parent(s): 8c582a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -95
app.py CHANGED
@@ -109,102 +109,102 @@ def main():
109
  selected_columns = st.multiselect("Choose columns", all_columns, default=all_columns)
110
  insurance_claims_reduced = insurance_claims_reduced[selected_columns].copy()
111
 
112
- with st.expander("Inference Description", expanded=True):
113
- insurance_claims_reduced.describe().T
114
-
115
- with st.expander("Head Map", expanded=True):
116
- cat_col = insurance_claims_reduced.select_dtypes(include=['object']).columns
117
- num_col = insurance_claims_reduced.select_dtypes(exclude=['object']).columns
118
-
119
- # insurance_claims[num_col].hist(bins=15, figsize=(20, 15), layout=(5, 4))
120
- # Calculate the correlation matrix
121
- corr_matrix = insurance_claims_reduced[num_col].corr()
122
- # Create a Matplotlib figure
123
- fig, ax = plt.subplots(figsize=(12, 8))
124
- # Create a heatmap using seaborn
125
- #st.header("Heat Map")
126
- sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', ax=ax)
127
- # Set the title for the heatmap
128
- ax.set_title('Correlation Heatmap')
129
- # Display the heatmap in Streamlit
130
- st.pyplot(fig)
131
-
132
- if st.button("Prediction"):
133
- #insurance_claims_reduced = insurance_claims_reduced[selected_columns].copy()
134
-
135
- s = setup(insurance_claims_reduced, session_id = 123, remove_multicollinearity=p_remove_multicollinearity, multicollinearity_threshold=p_multicollinearity_threshold,
136
- # remove_outliers=p_remove_outliers, outliers_method=p_outliers_method,
137
- transformation=p_transformation,
138
- normalize=p_normalize, pca=p_pca, pca_method=p_pca_method)
139
- exp_clustering = ClusteringExperiment()
140
- # init setup on exp
141
- exp_clustering.setup(insurance_claims_reduced, session_id = 123)
142
-
143
- with st.spinner("Analyzing..."):
144
- #with col2:
145
- #st.markdown("<br><br><br><br>", unsafe_allow_html=True)
146
- # train kmeans model
147
- cluster_model = create_model(selected_model, num_clusters = selected_clusters)
148
-
149
- cluster_model_2 = assign_model(cluster_model)
150
- # Calculate summary statistics for each cluster
151
- cluster_summary = cluster_model_2.groupby('Cluster').agg(['count', 'mean', 'median', 'min', 'max',
152
- 'std', 'var', 'sum', ('quantile_25', lambda x: x.quantile(0.25)),
153
- ('quantile_75', lambda x: x.quantile(0.75)), 'skew'])
154
 
155
- with st.expander("Cluster Summary", expanded=False):
156
- #st.header("Cluster Summary")
157
- cluster_summary
158
-
159
- with st.expander("Model Assign", expanded=False):
160
- #st.header("Assign Model")
161
- cluster_model_2
162
-
163
- # all_metrics = get_metrics()
164
- # all_metrics
165
-
166
- with st.expander("Clustering Metrics", expanded=False):
167
- #st.header("Clustering Metrics")
168
- cluster_results = pull()
169
- cluster_results
170
-
171
- with st.expander("Clustering Plots", expanded=False):
172
- if graph_select:
173
- #st.header("Clustering Plots")
174
- # plot pca cluster plot
175
- plot_model(cluster_model, plot = 'cluster', display_format = 'streamlit')
176
-
177
- if selected_model != 'ap':
178
- plot_model(cluster_model, plot = 'tsne', display_format = 'streamlit')
179
-
180
- if selected_model not in ('ap', 'meanshift', 'dbscan', 'optics'):
181
- plot_model(cluster_model, plot = 'elbow', display_format = 'streamlit')
182
-
183
- if selected_model not in ('ap', 'meanshift', 'sc', 'hclust', 'dbscan', 'optics'):
184
- plot_model(cluster_model, plot = 'silhouette', display_format = 'streamlit')
185
-
186
- if selected_model not in ('ap', 'sc', 'hclust', 'dbscan', 'optics', 'birch'):
187
- plot_model(cluster_model, plot = 'distance', display_format = 'streamlit')
188
-
189
- if selected_model != 'ap':
190
- plot_model(cluster_model, plot = 'distribution', display_format = 'streamlit')
191
-
192
- with st.expander("Feature Importance", expanded=False):
193
- # Create a Classification Model to extract feature importance
194
- if graph_select and feat_imp_select:
195
- #st.header("Feature Importance")
196
- from pycaret.classification import setup, create_model, get_config
197
- s = setup(cluster_model_2, target = 'Cluster')
198
- lr = create_model('lr')
199
-
200
- # this is how you can recreate the table
201
- feat_imp = pd.DataFrame({'Feature': get_config('X_train').columns, 'Value' : abs(lr.coef_[0])}).sort_values(by='Value', ascending=False)
202
- # sort by feature importance value and filter top 10
203
- feat_imp = feat_imp.sort_values(by='Value', ascending=False).head(10)
204
- # Display the filtered table in Streamlit
205
- # st.dataframe(feat_imp)
206
- # Display the filtered table as a bar chart in Streamlit
207
- st.bar_chart(feat_imp.set_index('Feature'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
  elif page == "Anomaly Detection":
210
  #with col1:
 
109
  selected_columns = st.multiselect("Choose columns", all_columns, default=all_columns)
110
  insurance_claims_reduced = insurance_claims_reduced[selected_columns].copy()
111
 
112
+ with st.expander("Inference Description", expanded=True):
113
+ insurance_claims_reduced.describe().T
114
+
115
+ with st.expander("Head Map", expanded=True):
116
+ cat_col = insurance_claims_reduced.select_dtypes(include=['object']).columns
117
+ num_col = insurance_claims_reduced.select_dtypes(exclude=['object']).columns
118
+
119
+ # insurance_claims[num_col].hist(bins=15, figsize=(20, 15), layout=(5, 4))
120
+ # Calculate the correlation matrix
121
+ corr_matrix = insurance_claims_reduced[num_col].corr()
122
+ # Create a Matplotlib figure
123
+ fig, ax = plt.subplots(figsize=(12, 8))
124
+ # Create a heatmap using seaborn
125
+ #st.header("Heat Map")
126
+ sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', ax=ax)
127
+ # Set the title for the heatmap
128
+ ax.set_title('Correlation Heatmap')
129
+ # Display the heatmap in Streamlit
130
+ st.pyplot(fig)
131
+
132
+ if st.button("Prediction"):
133
+ #insurance_claims_reduced = insurance_claims_reduced[selected_columns].copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
+ s = setup(insurance_claims_reduced, session_id = 123, remove_multicollinearity=p_remove_multicollinearity, multicollinearity_threshold=p_multicollinearity_threshold,
136
+ # remove_outliers=p_remove_outliers, outliers_method=p_outliers_method,
137
+ transformation=p_transformation,
138
+ normalize=p_normalize, pca=p_pca, pca_method=p_pca_method)
139
+ exp_clustering = ClusteringExperiment()
140
+ # init setup on exp
141
+ exp_clustering.setup(insurance_claims_reduced, session_id = 123)
142
+
143
+ with st.spinner("Analyzing..."):
144
+ #with col2:
145
+ #st.markdown("<br><br><br><br>", unsafe_allow_html=True)
146
+ # train kmeans model
147
+ cluster_model = create_model(selected_model, num_clusters = selected_clusters)
148
+
149
+ cluster_model_2 = assign_model(cluster_model)
150
+ # Calculate summary statistics for each cluster
151
+ cluster_summary = cluster_model_2.groupby('Cluster').agg(['count', 'mean', 'median', 'min', 'max',
152
+ 'std', 'var', 'sum', ('quantile_25', lambda x: x.quantile(0.25)),
153
+ ('quantile_75', lambda x: x.quantile(0.75)), 'skew'])
154
+
155
+ with st.expander("Cluster Summary", expanded=False):
156
+ #st.header("Cluster Summary")
157
+ cluster_summary
158
+
159
+ with st.expander("Model Assign", expanded=False):
160
+ #st.header("Assign Model")
161
+ cluster_model_2
162
+
163
+ # all_metrics = get_metrics()
164
+ # all_metrics
165
+
166
+ with st.expander("Clustering Metrics", expanded=False):
167
+ #st.header("Clustering Metrics")
168
+ cluster_results = pull()
169
+ cluster_results
170
+
171
+ with st.expander("Clustering Plots", expanded=False):
172
+ if graph_select:
173
+ #st.header("Clustering Plots")
174
+ # plot pca cluster plot
175
+ plot_model(cluster_model, plot = 'cluster', display_format = 'streamlit')
176
+
177
+ if selected_model != 'ap':
178
+ plot_model(cluster_model, plot = 'tsne', display_format = 'streamlit')
179
+
180
+ if selected_model not in ('ap', 'meanshift', 'dbscan', 'optics'):
181
+ plot_model(cluster_model, plot = 'elbow', display_format = 'streamlit')
182
+
183
+ if selected_model not in ('ap', 'meanshift', 'sc', 'hclust', 'dbscan', 'optics'):
184
+ plot_model(cluster_model, plot = 'silhouette', display_format = 'streamlit')
185
+
186
+ if selected_model not in ('ap', 'sc', 'hclust', 'dbscan', 'optics', 'birch'):
187
+ plot_model(cluster_model, plot = 'distance', display_format = 'streamlit')
188
+
189
+ if selected_model != 'ap':
190
+ plot_model(cluster_model, plot = 'distribution', display_format = 'streamlit')
191
+
192
+ with st.expander("Feature Importance", expanded=False):
193
+ # Create a Classification Model to extract feature importance
194
+ if graph_select and feat_imp_select:
195
+ #st.header("Feature Importance")
196
+ from pycaret.classification import setup, create_model, get_config
197
+ s = setup(cluster_model_2, target = 'Cluster')
198
+ lr = create_model('lr')
199
+
200
+ # this is how you can recreate the table
201
+ feat_imp = pd.DataFrame({'Feature': get_config('X_train').columns, 'Value' : abs(lr.coef_[0])}).sort_values(by='Value', ascending=False)
202
+ # sort by feature importance value and filter top 10
203
+ feat_imp = feat_imp.sort_values(by='Value', ascending=False).head(10)
204
+ # Display the filtered table in Streamlit
205
+ # st.dataframe(feat_imp)
206
+ # Display the filtered table as a bar chart in Streamlit
207
+ st.bar_chart(feat_imp.set_index('Feature'))
208
 
209
  elif page == "Anomaly Detection":
210
  #with col1: