SAV0918 / app.py
BOSCOCHEN's picture
Create app.py
9239bc6 verified
import joblib
import pandas as pd
import matplotlib.pyplot as plt
import streamlit as st
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from scipy.cluster.hierarchy import fcluster
# ================== 加載保存的模型 ==================
scaler = joblib.load('scaler.sav') # 標準化模型
pca = joblib.load('pca_model.sav') # PCA 模型
kmeans = joblib.load('kmeans_model.sav') # K-means 模型
linked = joblib.load('hierarchical_model.sav') # 階層式聚類模型
dbscan = joblib.load('dbscan_model.sav') # DBSCAN 模型
# 定義繪圖函數
def plot_clusters(data, labels, title):
plt.figure(figsize=(8, 6))
plt.scatter(data['PC1'], data['PC2'], c=labels, cmap='viridis', s=50)
plt.title(title)
plt.xlabel('Principal Component 1 (PC1)')
plt.ylabel('Principal Component 2 (PC2)')
plt.colorbar()
plt.savefig('plot.png')
plt.close()
return 'plot.png'
# 處理上傳的資料
def process_data(file):
# 讀取新資料
new_data = pd.read_csv(file)
# 移除 'Time' 欄位
new_numerical_data = new_data.drop(columns=['Time'])
# 數據預處理
scaled_new_data = scaler.transform(new_numerical_data) # 標準化數據
pca_new_data = pca.transform(scaled_new_data) # 使用已保存的 PCA 模型進行轉換
# 創建包含主成分的 DataFrame
pca_new_df = pd.DataFrame(pca_new_data, columns=['PC1', 'PC2'])
# 使用加載的模型進行聚類
kmeans_new_labels = kmeans.predict(pca_new_df)
hclust_new_labels = fcluster(linked, 3, criterion='maxclust')
dbscan_new_labels = dbscan.fit_predict(pca_new_df)
# 可視化結果
kmeans_plot = plot_clusters(pca_new_df, kmeans_new_labels, 'K-means Clustering')
hclust_plot = plot_clusters(pca_new_df, hclust_new_labels, 'Hierarchical Clustering')
dbscan_plot = plot_clusters(pca_new_df, dbscan_new_labels, 'DBSCAN Clustering')
return kmeans_new_labels, hclust_new_labels, dbscan_new_labels, kmeans_plot, hclust_plot, dbscan_plot
# Streamlit 應用程式
st.title("聚類模型應用")
# 文件上傳
uploaded_file = st.file_uploader("上傳 CSV 檔案", type=["csv"])
if uploaded_file is not None:
kmeans_labels, hclust_labels, dbscan_labels, kmeans_plot, hclust_plot, dbscan_plot = process_data(uploaded_file)
# 顯示 K-means 標籤
st.subheader("K-means Labels")
st.text(kmeans_labels)
# 顯示 Hierarchical 標籤
st.subheader("Hierarchical Clustering Labels")
st.text(hclust_labels)
# 顯示 DBSCAN 標籤
st.subheader("DBSCAN Labels")
st.text(dbscan_labels)
# 顯示圖像
st.subheader("K-means Clustering Plot")
st.image(kmeans_plot)
st.subheader("Hierarchical Clustering Plot")
st.image(hclust_plot)
st.subheader("DBSCAN Clustering Plot")
st.image(dbscan_plot)