Spaces:

eaglelandsonce
/

BreastCancerModel

Sleeping

App Files Files Community

eaglelandsonce commited on Nov 23, 2024

Commit

a2e802a

verified ·

1 Parent(s): f93ed07

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -129

app.py CHANGED Viewed

@@ -1,21 +1,5 @@
-import streamlit as st
-import pandas as pd
-import torch
-import torch.nn as nn
-import torch.optim as optim
-import matplotlib.pyplot as plt
-from sklearn.preprocessing import StandardScaler, LabelEncoder
-import numpy as np
-# Global scaler and label encoder for consistent preprocessing
-scaler = StandardScaler()
-label_encoder = LabelEncoder()
-feature_columns = None  # To store feature columns from the training data
-model = None  # Declare the model globally for predictions
-# Preload default files
-DEFAULT_TRAIN_FILE = "patientdata.csv"
-DEFAULT_PREDICT_FILE = "synthetic_breast_cancer_data_withColumn.csv"
 def main():
     global feature_columns, model
@@ -69,8 +53,8 @@ def main():
             st.error(f"Error during model training: {e}")
             return
-    # Upload data for prediction
-    st.write("Upload new data for prediction (ensure 'Treatment' column is removed if present).")
     new_data_file = st.file_uploader("Upload new CSV file for prediction", type="csv")
     if new_data_file is None:
         st.write("Using default prediction data.")
@@ -86,14 +70,17 @@ def main():
             st.error(f"Error loading uploaded prediction file: {e}")
             return
-    # Drop 'Treatment' column if it exists
-    if 'Treatment' in new_data.columns:
-        st.warning("The 'Treatment' column is present in the prediction data and will be removed.")
-        new_data = new_data.drop(columns=['Treatment'])
     st.write("Prediction Dataset Preview:")
     st.dataframe(new_data.head())  # Display new data
     if model is not None and feature_columns is not None:
         try:
             # Align columns to match training data
@@ -101,115 +88,35 @@ def main():
             if new_data_aligned is not None:
                 predictions = predict_treatment(new_data_aligned, model)
-                # Display Predictions in an Output Box
-                st.subheader("Predicted Treatment Outcomes")
-                prediction_output = "\n".join([f"Patient {i+1}: {pred}" for i, pred in enumerate(predictions)])
-                st.text_area("Prediction Results", prediction_output, height=200)
             else:
                 st.error("Unable to align prediction data to the training feature columns.")
         except Exception as e:
-            st.error(f"Error during prediction: {e}")
     else:
-        st.warning("Please train the model first before predicting on new data.")
-def preprocess_training_data(data):
-    global scaler, label_encoder
-    # Label encode the 'Treatment' target column
-    data['Treatment'] = label_encoder.fit_transform(data['Treatment'])
-    y = data['Treatment'].values
-    # Encode and standardize feature columns
-    X = data.drop('Treatment', axis=1)
-    feature_columns = X.columns  # Store feature columns for later alignment
-    for col in X.select_dtypes(include=['object']).columns:
-        X[col] = LabelEncoder().fit_transform(X[col])
-    # Standardize features
-    X = scaler.fit_transform(X)
-    return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.long), X.shape[1], len(np.unique(y)), feature_columns
-def align_columns(new_data, feature_columns):
-    try:
-        # Ensure the new data has the same columns as the training data
-        missing_cols = set(feature_columns) - set(new_data.columns)
-        extra_cols = set(new_data.columns) - set(feature_columns)
-        # Remove any extra columns
-        new_data = new_data.drop(columns=extra_cols)
-        # Add missing columns with default value 0
-        for col in missing_cols:
-            new_data[col] = 0
-        # Reorder columns to match the training data
-        new_data = new_data[feature_columns]
-        # Encode and standardize feature columns
-        for col in new_data.select_dtypes(include=['object']).columns:
-            new_data[col] = LabelEncoder().fit_transform(new_data[col])
-        # Scale features
-        new_data = scaler.transform(new_data)
-        return torch.tensor(new_data, dtype=torch.float32)
-    except Exception as e:
-        st.error(f"Error aligning columns: {e}")
-        return None
-def train_model(X, y, input_dim, hidden_dim, num_classes, learning_rate, epochs):
-    class SimpleNN(nn.Module):
-        def __init__(self, input_dim, hidden_dim, num_classes):
-            super(SimpleNN, self).__init__()
-            self.fc1 = nn.Linear(input_dim, hidden_dim)
-            self.relu = nn.ReLU()
-            self.fc2 = nn.Linear(hidden_dim, num_classes)
-        def forward(self, x):
-            x = self.fc1(x)
-            x = self.relu(x)
-            x = self.fc2(x)
-            return x
-    model = SimpleNN(input_dim, hidden_dim, num_classes)
-    criterion = nn.CrossEntropyLoss()
-    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
-    loss_curve = []
-    for epoch in range(epochs):
-        optimizer.zero_grad()
-        outputs = model(X)
-        loss = criterion(outputs, y)
-        loss.backward()
-        optimizer.step()
-        loss_curve.append(loss.item())
-    return model, loss_curve
-def plot_loss_curve(loss_curve):
-    plt.figure()
-    plt.plot(loss_curve, label="Training Loss")
-    plt.xlabel("Epochs")
-    plt.ylabel("Loss")
-    plt.title("Loss Curve")
-    plt.legend()
-    plt.tight_layout()  # Ensure layout is tight for Streamlit
     st.pyplot(plt)
-def predict_treatment(new_data, model, batch_size=32):
-    model.eval()
-    predictions = []
-    with torch.no_grad():
-        for i in range(0, new_data.size(0), batch_size):
-            batch_data = new_data[i:i + batch_size]
-            outputs = model(batch_data)
-            _, batch_predictions = torch.max(outputs, 1)
-            predictions.extend(batch_predictions.numpy())
-    return label_encoder.inverse_transform(predictions)
 if __name__ == "__main__":
     main()

+from sklearn.metrics import classification_report, confusion_matrix
+import seaborn as sns  # For confusion matrix heatmap
 def main():
     global feature_columns, model
             st.error(f"Error during model training: {e}")
             return
+    # Upload data for prediction and comparison
+    st.write("Upload new data for prediction and evaluation.")
     new_data_file = st.file_uploader("Upload new CSV file for prediction", type="csv")
     if new_data_file is None:
         st.write("Using default prediction data.")
             st.error(f"Error loading uploaded prediction file: {e}")
             return
     st.write("Prediction Dataset Preview:")
     st.dataframe(new_data.head())  # Display new data
+    if 'Treatment' not in new_data.columns:
+        st.error("The prediction file must contain a 'Treatment' column for evaluation.")
+        return
+    # Extract true labels and drop Treatment for prediction
+    true_labels = label_encoder.transform(new_data['Treatment'])
+    new_data = new_data.drop(columns=['Treatment'])
     if model is not None and feature_columns is not None:
         try:
             # Align columns to match training data
             if new_data_aligned is not None:
                 predictions = predict_treatment(new_data_aligned, model)
+                # Evaluation Metrics
+                st.subheader("Model Evaluation Metrics")
+                classification_metrics(true_labels, predictions)
+                # Visualize Confusion Matrix
+                confusion_mat = confusion_matrix(true_labels, predictions)
+                plot_confusion_matrix(confusion_mat, label_encoder.classes_)
             else:
                 st.error("Unable to align prediction data to the training feature columns.")
         except Exception as e:
+            st.error(f"Error during prediction or evaluation: {e}")
     else:
+        st.warning("Please train the model first before predicting and evaluating on new data.")
+def classification_metrics(true_labels, predictions):
+    # Generate classification report
+    report = classification_report(true_labels, predictions, target_names=label_encoder.classes_, output_dict=True)
+    st.write("Classification Report:")
+    st.table(pd.DataFrame(report).transpose())
+def plot_confusion_matrix(confusion_mat, classes):
+    # Plot confusion matrix
+    plt.figure(figsize=(8, 6))
+    sns.heatmap(confusion_mat, annot=True, fmt="d", cmap="Blues", xticklabels=classes, yticklabels=classes)
+    plt.xlabel("Predicted Labels")
+    plt.ylabel("True Labels")
+    plt.title("Confusion Matrix")
     st.pyplot(plt)
 if __name__ == "__main__":
     main()