Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -98,18 +98,18 @@ def main():
|
|
98 |
insurance_claims = pd.read_csv(selected_csv)
|
99 |
|
100 |
num_rows = int(insurance_claims.shape[0]*int(num_lines)/100)
|
101 |
-
|
102 |
st.write("Rows to be processed: " + str(num_rows))
|
103 |
|
104 |
st.header("Inference Description")
|
105 |
-
|
106 |
|
107 |
-
cat_col =
|
108 |
-
num_col =
|
109 |
|
110 |
# insurance_claims[num_col].hist(bins=15, figsize=(20, 15), layout=(5, 4))
|
111 |
# Calculate the correlation matrix
|
112 |
-
corr_matrix =
|
113 |
# Create a Matplotlib figure
|
114 |
fig, ax = plt.subplots(figsize=(12, 8))
|
115 |
# Create a heatmap using seaborn
|
@@ -120,19 +120,19 @@ def main():
|
|
120 |
# Display the heatmap in Streamlit
|
121 |
st.pyplot(fig)
|
122 |
|
123 |
-
all_columns =
|
124 |
selected_columns = st.multiselect("Choose columns", all_columns, default=all_columns)
|
125 |
|
126 |
if st.button("Prediction"):
|
127 |
-
|
128 |
|
129 |
-
s = setup(
|
130 |
# remove_outliers=p_remove_outliers, outliers_method=p_outliers_method,
|
131 |
transformation=p_transformation,
|
132 |
normalize=p_normalize, pca=p_pca, pca_method=p_pca_method)
|
133 |
exp_clustering = ClusteringExperiment()
|
134 |
# init setup on exp
|
135 |
-
exp_clustering.setup(
|
136 |
|
137 |
with st.spinner("Analyzing..."):
|
138 |
# train kmeans model
|
|
|
98 |
insurance_claims = pd.read_csv(selected_csv)
|
99 |
|
100 |
num_rows = int(insurance_claims.shape[0]*int(num_lines)/100)
|
101 |
+
insurance_claims_reduced = insurance_claims.head(num_rows)
|
102 |
st.write("Rows to be processed: " + str(num_rows))
|
103 |
|
104 |
st.header("Inference Description")
|
105 |
+
insurance_claims_reduced.describe().T
|
106 |
|
107 |
+
cat_col = insurance_claims_reduced.select_dtypes(include=['object']).columns
|
108 |
+
num_col = insurance_claims_reduced.select_dtypes(exclude=['object']).columns
|
109 |
|
110 |
# insurance_claims[num_col].hist(bins=15, figsize=(20, 15), layout=(5, 4))
|
111 |
# Calculate the correlation matrix
|
112 |
+
corr_matrix = insurance_claims_reduced[num_col].corr()
|
113 |
# Create a Matplotlib figure
|
114 |
fig, ax = plt.subplots(figsize=(12, 8))
|
115 |
# Create a heatmap using seaborn
|
|
|
120 |
# Display the heatmap in Streamlit
|
121 |
st.pyplot(fig)
|
122 |
|
123 |
+
all_columns = insurance_claims_reduced.columns.tolist()
|
124 |
selected_columns = st.multiselect("Choose columns", all_columns, default=all_columns)
|
125 |
|
126 |
if st.button("Prediction"):
|
127 |
+
insurance_claims_reduced = insurance_claims_reduced[selected_columns].copy()
|
128 |
|
129 |
+
s = setup(insurance_claims_reduced, session_id = 123, remove_multicollinearity=p_remove_multicollinearity, multicollinearity_threshold=p_multicollinearity_threshold,
|
130 |
# remove_outliers=p_remove_outliers, outliers_method=p_outliers_method,
|
131 |
transformation=p_transformation,
|
132 |
normalize=p_normalize, pca=p_pca, pca_method=p_pca_method)
|
133 |
exp_clustering = ClusteringExperiment()
|
134 |
# init setup on exp
|
135 |
+
exp_clustering.setup(insurance_claims_reduced, session_id = 123)
|
136 |
|
137 |
with st.spinner("Analyzing..."):
|
138 |
# train kmeans model
|