dperales commited on
Commit
72bc4dc
·
1 Parent(s): 1fd59fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -98,18 +98,18 @@ def main():
98
  insurance_claims = pd.read_csv(selected_csv)
99
 
100
  num_rows = int(insurance_claims.shape[0]*int(num_lines)/100)
101
- insurance_claims = insurance_claims.head(num_rows)
102
  st.write("Rows to be processed: " + str(num_rows))
103
 
104
  st.header("Inference Description")
105
- insurance_claims.describe().T
106
 
107
- cat_col = insurance_claims.select_dtypes(include=['object']).columns
108
- num_col = insurance_claims.select_dtypes(exclude=['object']).columns
109
 
110
  # insurance_claims[num_col].hist(bins=15, figsize=(20, 15), layout=(5, 4))
111
  # Calculate the correlation matrix
112
- corr_matrix = insurance_claims[num_col].corr()
113
  # Create a Matplotlib figure
114
  fig, ax = plt.subplots(figsize=(12, 8))
115
  # Create a heatmap using seaborn
@@ -120,19 +120,19 @@ def main():
120
  # Display the heatmap in Streamlit
121
  st.pyplot(fig)
122
 
123
- all_columns = insurance_claims.columns.tolist()
124
  selected_columns = st.multiselect("Choose columns", all_columns, default=all_columns)
125
 
126
  if st.button("Prediction"):
127
- insurance_claims = insurance_claims[selected_columns].copy()
128
 
129
- s = setup(insurance_claims, session_id = 123, remove_multicollinearity=p_remove_multicollinearity, multicollinearity_threshold=p_multicollinearity_threshold,
130
  # remove_outliers=p_remove_outliers, outliers_method=p_outliers_method,
131
  transformation=p_transformation,
132
  normalize=p_normalize, pca=p_pca, pca_method=p_pca_method)
133
  exp_clustering = ClusteringExperiment()
134
  # init setup on exp
135
- exp_clustering.setup(insurance_claims, session_id = 123)
136
 
137
  with st.spinner("Analyzing..."):
138
  # train kmeans model
 
98
  insurance_claims = pd.read_csv(selected_csv)
99
 
100
  num_rows = int(insurance_claims.shape[0]*int(num_lines)/100)
101
+ insurance_claims_reduced = insurance_claims.head(num_rows)
102
  st.write("Rows to be processed: " + str(num_rows))
103
 
104
  st.header("Inference Description")
105
+ insurance_claims_reduced.describe().T
106
 
107
+ cat_col = insurance_claims_reduced.select_dtypes(include=['object']).columns
108
+ num_col = insurance_claims_reduced.select_dtypes(exclude=['object']).columns
109
 
110
  # insurance_claims[num_col].hist(bins=15, figsize=(20, 15), layout=(5, 4))
111
  # Calculate the correlation matrix
112
+ corr_matrix = insurance_claims_reduced[num_col].corr()
113
  # Create a Matplotlib figure
114
  fig, ax = plt.subplots(figsize=(12, 8))
115
  # Create a heatmap using seaborn
 
120
  # Display the heatmap in Streamlit
121
  st.pyplot(fig)
122
 
123
+ all_columns = insurance_claims_reduced.columns.tolist()
124
  selected_columns = st.multiselect("Choose columns", all_columns, default=all_columns)
125
 
126
  if st.button("Prediction"):
127
+ insurance_claims_reduced = insurance_claims_reduced[selected_columns].copy()
128
 
129
+ s = setup(insurance_claims_reduced, session_id = 123, remove_multicollinearity=p_remove_multicollinearity, multicollinearity_threshold=p_multicollinearity_threshold,
130
  # remove_outliers=p_remove_outliers, outliers_method=p_outliers_method,
131
  transformation=p_transformation,
132
  normalize=p_normalize, pca=p_pca, pca_method=p_pca_method)
133
  exp_clustering = ClusteringExperiment()
134
  # init setup on exp
135
+ exp_clustering.setup(insurance_claims_reduced, session_id = 123)
136
 
137
  with st.spinner("Analyzing..."):
138
  # train kmeans model