Spencer525 commited on
Commit
cfab2e3
·
verified ·
1 Parent(s): 2ff6e87

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -0
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.preprocessing import StandardScaler
8
+ from sklearn.tree import DecisionTreeClassifier
9
+ from sklearn.ensemble import RandomForestClassifier
10
+ from xgboost import XGBClassifier
11
+ from sklearn.inspection import permutation_importance
12
+ from sklearn.feature_selection import mutual_info_classif
13
+ import io
14
+ import base64
15
+
16
+ # Function to create a download link
17
+ def get_download_link(data, filename, text):
18
+ b64 = base64.b64encode(data).decode()
19
+ href = f'<a href="data:file/csv;base64,{b64}" download="{filename}">{text}</a>'
20
+ return href
21
+
22
+ # Function to plot correlation matrix
23
+ def plot_correlation_matrix(data):
24
+ plt.figure(figsize=(12, 10))
25
+ sns.heatmap(data.corr(), annot=True, cmap='coolwarm', linewidths=0.5)
26
+ plt.title('Correlation Matrix')
27
+ plt.tight_layout()
28
+ st.pyplot(plt)
29
+
30
+ # Function to calculate feature importance
31
+ def calculate_feature_importance(X, y):
32
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
33
+ scaler = StandardScaler()
34
+ X_train_scaled = scaler.fit_transform(X_train)
35
+ X_test_scaled = scaler.transform(X_test)
36
+
37
+ methods = {
38
+ "Decision Tree": DecisionTreeClassifier(random_state=42),
39
+ "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
40
+ "XGBoost": XGBClassifier(random_state=42)
41
+ }
42
+
43
+ importance_dict = {}
44
+
45
+ for name, model in methods.items():
46
+ model.fit(X_train_scaled, y_train)
47
+ importance_dict[name] = model.feature_importances_
48
+
49
+ # Permutation Importance
50
+ rf = RandomForestClassifier(n_estimators=100, random_state=42)
51
+ rf.fit(X_train_scaled, y_train)
52
+ perm_importance = permutation_importance(rf, X_test_scaled, y_test, n_repeats=10, random_state=42)
53
+ importance_dict["Permutation"] = perm_importance.importances_mean
54
+
55
+ # Mutual Information
56
+ mi_scores = mutual_info_classif(X_train_scaled, y_train)
57
+ importance_dict["Mutual Information"] = mi_scores
58
+
59
+ return importance_dict
60
+
61
+ # Streamlit app
62
+ st.title('Heart Disease Feature Analysis')
63
+
64
+ # File upload
65
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
66
+
67
+ if uploaded_file is not None:
68
+ data = pd.read_csv(uploaded_file)
69
+ st.write("Data Preview:")
70
+ st.write(data.head())
71
+
72
+ # Select target variable
73
+ target_col = st.selectbox("Select the target variable", data.columns)
74
+
75
+ if st.button('Analyze'):
76
+ X = data.drop(target_col, axis=1)
77
+ y = data[target_col]
78
+
79
+ # Correlation Matrix
80
+ st.subheader('Correlation Matrix')
81
+ plot_correlation_matrix(data)
82
+
83
+ # Download correlation matrix as PNG
84
+ buf = io.BytesIO()
85
+ plt.savefig(buf, format='png')
86
+ buf.seek(0)
87
+ st.markdown(get_download_link(buf.getvalue(), "correlation_matrix.png", "Download Correlation Matrix as PNG"), unsafe_allow_html=True)
88
+
89
+ # Feature Importance
90
+ st.subheader('Feature Importance')
91
+ importance_dict = calculate_feature_importance(X, y)
92
+
93
+ # Create a DataFrame with all feature importances
94
+ importance_df = pd.DataFrame(importance_dict, index=X.columns)
95
+ st.write(importance_df)
96
+
97
+ # Download feature importance as XLSX
98
+ excel_buffer = io.BytesIO()
99
+ with pd.ExcelWriter(excel_buffer, engine='xlsxwriter') as writer:
100
+ importance_df.to_excel(writer, sheet_name='Feature Importance')
101
+ excel_buffer.seek(0)
102
+ st.markdown(get_download_link(excel_buffer.getvalue(), "feature_importance.xlsx", "Download Feature Importance as XLSX"), unsafe_allow_html=True)
103
+
104
+ else:
105
+ st.write("Please upload a CSV file to begin the analysis.")