INd_test_0906 / app.py
Spencer525's picture
Create app.py
d80e71f verified
# Create a Streamlit app for data analysis
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans, DBSCAN
from sklearn.metrics import silhouette_score
from sklearn.neighbors import LocalOutlierFactor
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
# Streamlit app
st.title('Data Analysis with Streamlit')
# File uploader
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
if uploaded_file is not None:
# Read the uploaded CSV file
df = pd.read_csv(uploaded_file)
st.write("Data loaded successfully.")
st.write(df.head())
# Exclude non-numeric columns for analysis
numeric_df = df.select_dtypes(include=[np.number])
# Standardize the data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(numeric_df)
# PCA
pca = PCA(n_components=2)
pca_result = pca.fit_transform(scaled_data)
fig, ax = plt.subplots()
ax.scatter(pca_result[:, 0], pca_result[:, 1], c='blue', edgecolor='k', s=50)
ax.set_title('PCA Result')
ax.set_xlabel('Principal Component 1')
ax.set_ylabel('Principal Component 2')
st.pyplot(fig)
# KMeans Clustering
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(scaled_data)
fig, ax = plt.subplots()
ax.scatter(pca_result[:, 0], pca_result[:, 1], c=clusters, cmap='viridis', edgecolor='k', s=50)
ax.set_title('KMeans Clustering')
ax.set_xlabel('Principal Component 1')
ax.set_ylabel('Principal Component 2')
st.pyplot(fig)
# Silhouette Score
silhouette_avg = silhouette_score(scaled_data, clusters)
st.write('Silhouette Score:', silhouette_avg)
# Local Outlier Factor (LOF)
lof = LocalOutlierFactor(n_neighbors=20)
lof_labels = lof.fit_predict(scaled_data)
lof_outliers = np.sum(lof_labels == -1)
st.write("Number of outliers detected by LOF:", lof_outliers)
# Isolation Forest
isolation_forest = IsolationForest(contamination=0.1, random_state=42)
isolation_labels = isolation_forest.fit_predict(scaled_data)
isolation_outliers = np.sum(isolation_labels == -1)
st.write("Number of outliers detected by Isolation Forest:", isolation_outliers)
# DBSCAN
dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan_labels = dbscan.fit_predict(scaled_data)
silhouette_dbscan = silhouette_score(scaled_data, dbscan_labels)
st.write("DBSCAN Silhouette Score:", silhouette_dbscan)
# To run this Streamlit app, save it as a .py file and execute it using the command: streamlit run <filename>.py
print("Streamlit app code generated.")