Spaces:

eaglelandsonce
/

BreastCancerModel

Sleeping

App Files Files Community

eaglelandsonce commited on Nov 23, 2024

Commit

39cc7e2

verified ·

1 Parent(s): dd06c5f

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -44

app.py CHANGED Viewed

@@ -12,7 +12,10 @@ scaler = StandardScaler()
 label_encoder = LabelEncoder()
 feature_columns = None  # To store feature columns from the training data
-# Streamlit App
 def main():
     global feature_columns
@@ -21,49 +24,56 @@ def main():
     # Upload training data
     uploaded_file = st.file_uploader("Upload a CSV file for training", type="csv")
-    if uploaded_file is not None:
         data = pd.read_csv(uploaded_file)
-        st.write("Training Dataset Preview:", data.head())
-        # Check for Treatment column in training data
-        if 'Treatment' not in data.columns:
-            st.error("The training data must contain a 'Treatment' column.")
-            return
-        # Prepare Data
-        X, y, input_dim, num_classes, feature_columns = preprocess_training_data(data)
-        # Model Parameters
-        hidden_dim = st.slider("Hidden Layer Dimension", 10, 100, 50)
-        learning_rate = st.number_input("Learning Rate", 0.0001, 0.1, 0.001)
-        epochs = st.number_input("Epochs", 1, 100, 20)
-        # Model training
-        if st.button("Train Model"):
-            model, loss_curve = train_model(X, y, input_dim, hidden_dim, num_classes, learning_rate, epochs)
-            plot_loss_curve(loss_curve)
     # Upload data for prediction
     st.write("Upload new data without the 'Treatment' column for prediction.")
     new_data_file = st.file_uploader("Upload new CSV file for prediction", type="csv")
-    if new_data_file is not None:
-        if 'model' in locals() and feature_columns is not None:
-            new_data = pd.read_csv(new_data_file)
-            # Align columns to match training data
-            new_data_aligned = align_columns(new_data, feature_columns)
-            if new_data_aligned is not None:
-                predictions = predict_treatment(new_data_aligned, model)
-                # Display Predictions in an Output Box
-                st.subheader("Predicted Treatment Outcomes")
-                prediction_output = "\n".join([f"Patient {i+1}: {pred}" for i, pred in enumerate(predictions)])
-                st.text_area("Prediction Results", prediction_output, height=200)
-            else:
-                st.error("Unable to align prediction data to the training feature columns.")
         else:
-            st.error("Please train the model first before predicting on new data.")
 def preprocess_training_data(data):
     global scaler, label_encoder
@@ -149,17 +159,6 @@ def plot_loss_curve(loss_curve):
     st.pyplot(plt)
 def predict_treatment(new_data, model, batch_size=32):
-    """
-    Predict treatment outcomes for new data using the trained model.
-    Args:
-    - new_data (pd.DataFrame): The new dataset without a 'Treatment' column.
-    - model (torch.nn.Module): The trained PyTorch model.
-    - batch_size (int): Size of data batches for predictions (optional).
-    Returns:
-    - List of predicted outcomes in the original label format.
-    """
     model.eval()
     predictions = []

 label_encoder = LabelEncoder()
 feature_columns = None  # To store feature columns from the training data
+# Preload default files
+DEFAULT_TRAIN_FILE = "patientdata.csv"
+DEFAULT_PREDICT_FILE = "synthetic_breast_cancer_notreatmentcolumn.csv"
 def main():
     global feature_columns
     # Upload training data
     uploaded_file = st.file_uploader("Upload a CSV file for training", type="csv")
+    if uploaded_file is None:
+        st.write("Using default training data.")
+        data = pd.read_csv(DEFAULT_TRAIN_FILE)
+    else:
         data = pd.read_csv(uploaded_file)
+    st.write("Training Dataset Preview:", data.head())
+    # Check for Treatment column in training data
+    if 'Treatment' not in data.columns:
+        st.error("The training data must contain a 'Treatment' column.")
+        return
+    # Prepare Data
+    X, y, input_dim, num_classes, feature_columns = preprocess_training_data(data)
+    # Model Parameters
+    hidden_dim = st.slider("Hidden Layer Dimension", 10, 100, 50)
+    learning_rate = st.number_input("Learning Rate", 0.0001, 0.1, 0.001)
+    epochs = st.number_input("Epochs", 1, 100, 20)
+    # Model training
+    if st.button("Train Model"):
+        model, loss_curve = train_model(X, y, input_dim, hidden_dim, num_classes, learning_rate, epochs)
+        plot_loss_curve(loss_curve)
     # Upload data for prediction
     st.write("Upload new data without the 'Treatment' column for prediction.")
     new_data_file = st.file_uploader("Upload new CSV file for prediction", type="csv")
+    if new_data_file is None:
+        st.write("Using default prediction data.")
+        new_data = pd.read_csv(DEFAULT_PREDICT_FILE)
+    else:
+        new_data = pd.read_csv(new_data_file)
+    st.write("Prediction Dataset Preview:", new_data.head())
+    if 'model' in locals() and feature_columns is not None:
+        # Align columns to match training data
+        new_data_aligned = align_columns(new_data, feature_columns)
+        if new_data_aligned is not None:
+            predictions = predict_treatment(new_data_aligned, model)
+            # Display Predictions in an Output Box
+            st.subheader("Predicted Treatment Outcomes")
+            prediction_output = "\n".join([f"Patient {i+1}: {pred}" for i, pred in enumerate(predictions)])
+            st.text_area("Prediction Results", prediction_output, height=200)
         else:
+            st.error("Unable to align prediction data to the training feature columns.")
+    else:
+        st.warning("Please train the model first before predicting on new data.")
 def preprocess_training_data(data):
     global scaler, label_encoder
     st.pyplot(plt)
 def predict_treatment(new_data, model, batch_size=32):
     model.eval()
     predictions = []