Spencer525 commited on
Commit
d80e71f
1 Parent(s): 86685cd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -0
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Create a Streamlit app for data analysis
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import numpy as np
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
+ from sklearn.decomposition import PCA
8
+ from sklearn.cluster import KMeans, DBSCAN
9
+ from sklearn.metrics import silhouette_score
10
+ from sklearn.neighbors import LocalOutlierFactor
11
+ from sklearn.ensemble import IsolationForest
12
+ from sklearn.preprocessing import StandardScaler
13
+
14
+ # Streamlit app
15
+ st.title('Data Analysis with Streamlit')
16
+
17
+ # File uploader
18
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
19
+
20
+ if uploaded_file is not None:
21
+ # Read the uploaded CSV file
22
+ df = pd.read_csv(uploaded_file)
23
+ st.write("Data loaded successfully.")
24
+ st.write(df.head())
25
+
26
+ # Exclude non-numeric columns for analysis
27
+ numeric_df = df.select_dtypes(include=[np.number])
28
+
29
+ # Standardize the data
30
+ scaler = StandardScaler()
31
+ scaled_data = scaler.fit_transform(numeric_df)
32
+
33
+ # PCA
34
+ pca = PCA(n_components=2)
35
+ pca_result = pca.fit_transform(scaled_data)
36
+ fig, ax = plt.subplots()
37
+ ax.scatter(pca_result[:, 0], pca_result[:, 1], c='blue', edgecolor='k', s=50)
38
+ ax.set_title('PCA Result')
39
+ ax.set_xlabel('Principal Component 1')
40
+ ax.set_ylabel('Principal Component 2')
41
+ st.pyplot(fig)
42
+
43
+ # KMeans Clustering
44
+ kmeans = KMeans(n_clusters=3, random_state=42)
45
+ clusters = kmeans.fit_predict(scaled_data)
46
+ fig, ax = plt.subplots()
47
+ ax.scatter(pca_result[:, 0], pca_result[:, 1], c=clusters, cmap='viridis', edgecolor='k', s=50)
48
+ ax.set_title('KMeans Clustering')
49
+ ax.set_xlabel('Principal Component 1')
50
+ ax.set_ylabel('Principal Component 2')
51
+ st.pyplot(fig)
52
+
53
+ # Silhouette Score
54
+ silhouette_avg = silhouette_score(scaled_data, clusters)
55
+ st.write('Silhouette Score:', silhouette_avg)
56
+
57
+ # Local Outlier Factor (LOF)
58
+ lof = LocalOutlierFactor(n_neighbors=20)
59
+ lof_labels = lof.fit_predict(scaled_data)
60
+ lof_outliers = np.sum(lof_labels == -1)
61
+ st.write("Number of outliers detected by LOF:", lof_outliers)
62
+
63
+ # Isolation Forest
64
+ isolation_forest = IsolationForest(contamination=0.1, random_state=42)
65
+ isolation_labels = isolation_forest.fit_predict(scaled_data)
66
+ isolation_outliers = np.sum(isolation_labels == -1)
67
+ st.write("Number of outliers detected by Isolation Forest:", isolation_outliers)
68
+
69
+ # DBSCAN
70
+ dbscan = DBSCAN(eps=0.5, min_samples=5)
71
+ dbscan_labels = dbscan.fit_predict(scaled_data)
72
+ silhouette_dbscan = silhouette_score(scaled_data, dbscan_labels)
73
+ st.write("DBSCAN Silhouette Score:", silhouette_dbscan)
74
+
75
+ # To run this Streamlit app, save it as a .py file and execute it using the command: streamlit run <filename>.py
76
+ print("Streamlit app code generated.")