eaglelandsonce commited on
Commit
1a06f67
·
verified ·
1 Parent(s): 18638cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -209
app.py CHANGED
@@ -218,212 +218,3 @@ if __name__ == "__main__":
218
  main()
219
 
220
 
221
- """
222
- import streamlit as st
223
- import pandas as pd
224
- import torch
225
- import torch.nn as nn
226
- import torch.optim as optim
227
- import matplotlib.pyplot as plt
228
- from sklearn.preprocessing import StandardScaler, LabelEncoder
229
- from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
230
- import numpy as np
231
-
232
- # Global scaler and label encoder for consistent preprocessing
233
- scaler = StandardScaler()
234
- label_encoder = LabelEncoder()
235
- feature_columns = None # To store feature columns from the training data
236
-
237
- # Preload default files
238
- DEFAULT_TRAIN_FILE = "patientdata.csv"
239
- DEFAULT_PREDICT_FILE = "synthetic_breast_cancer_notreatmentcolumn.csv"
240
- DEFAULT_LABEL_FILE = "synthetic_breast_cancer_data_withColumn.csv"
241
-
242
- def main():
243
- global feature_columns
244
-
245
- st.title("Patient Treatment Prediction App")
246
- st.write("Upload patient data to train a model and predict treatments based on input data.")
247
-
248
- # Upload training data
249
- uploaded_file = st.file_uploader("Upload a CSV file for training", type="csv")
250
- if uploaded_file is None:
251
- st.write("Using default training data.")
252
- data = pd.read_csv(DEFAULT_TRAIN_FILE)
253
- else:
254
- data = pd.read_csv(uploaded_file)
255
- st.write("Training Dataset Preview:", data.head())
256
-
257
- # Check for Treatment column in training data
258
- if 'Treatment' not in data.columns:
259
- st.error("The training data must contain a 'Treatment' column.")
260
- return
261
-
262
- # Prepare Data
263
- X, y, input_dim, num_classes, feature_columns = preprocess_training_data(data)
264
-
265
- # Model Parameters
266
- hidden_dim = st.slider("Hidden Layer Dimension", 10, 100, 50)
267
- learning_rate = st.number_input("Learning Rate", 0.0001, 0.1, 0.01)
268
- epochs = st.number_input("Epochs", 1, 100, 20)
269
-
270
- # Model training
271
- if st.button("Train Model"):
272
- model, loss_curve = train_model(X, y, input_dim, hidden_dim, num_classes, learning_rate, epochs)
273
- plot_loss_curve(loss_curve)
274
-
275
- # Upload data for prediction
276
- st.write("Upload new data without the 'Treatment' column for prediction.")
277
- new_data_file = st.file_uploader("Upload new CSV file for prediction", type="csv")
278
- if new_data_file is None:
279
- st.write("Using default prediction data.")
280
- new_data = pd.read_csv(DEFAULT_PREDICT_FILE)
281
- else:
282
- new_data = pd.read_csv(new_data_file)
283
- st.write("Prediction Dataset Preview:", new_data.head())
284
-
285
- if 'model' in locals() and feature_columns is not None:
286
- # Align columns to match training data
287
- new_data_aligned = align_columns(new_data, feature_columns)
288
-
289
- if new_data_aligned is not None:
290
- predictions = predict_treatment(new_data_aligned, model)
291
-
292
- # Display Predictions in an Output Box
293
- st.subheader("Predicted Treatment Outcomes")
294
- prediction_output = "\n".join([f"Patient {i+1}: {pred}" for i, pred in enumerate(predictions)])
295
- st.text_area("Prediction Results", prediction_output, height=200)
296
-
297
- # Compare predictions with actual labels
298
- actual_data = pd.read_csv(DEFAULT_LABEL_FILE)
299
- if 'Treatment' in actual_data.columns:
300
- actual_labels = label_encoder.transform(actual_data['Treatment'])
301
- evaluate_model_performance(predictions, actual_labels)
302
- else:
303
- st.error("Actual labels file must contain a 'Treatment' column.")
304
- else:
305
- st.error("Unable to align prediction data to the training feature columns.")
306
- else:
307
- st.warning("Please train the model first before predicting on new data.")
308
-
309
- def preprocess_training_data(data):
310
- global scaler, label_encoder
311
-
312
- # Label encode the 'Treatment' target column
313
- data['Treatment'] = label_encoder.fit_transform(data['Treatment'])
314
- y = data['Treatment'].values
315
-
316
- # Encode and standardize feature columns
317
- X = data.drop('Treatment', axis=1)
318
- feature_columns = X.columns # Store feature columns for later alignment
319
- for col in X.select_dtypes(include=['object']).columns:
320
- X[col] = LabelEncoder().fit_transform(X[col])
321
-
322
- # Standardize features
323
- X = scaler.fit_transform(X)
324
-
325
- return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.long), X.shape[1], len(np.unique(y)), feature_columns
326
-
327
- def align_columns(new_data, feature_columns):
328
- # Ensure the new data has the same columns as the training data
329
- missing_cols = set(feature_columns) - set(new_data.columns)
330
- extra_cols = set(new_data.columns) - set(feature_columns)
331
-
332
- # Remove any extra columns
333
- new_data = new_data.drop(columns=extra_cols)
334
-
335
- # Add missing columns with default value 0
336
- for col in missing_cols:
337
- new_data[col] = 0
338
-
339
- # Reorder columns to match the training data
340
- new_data = new_data[feature_columns]
341
-
342
- # Encode and standardize feature columns
343
- for col in new_data.select_dtypes(include=['object']).columns:
344
- new_data[col] = LabelEncoder().fit_transform(new_data[col])
345
-
346
- # Scale features
347
- new_data = scaler.transform(new_data)
348
-
349
- return torch.tensor(new_data, dtype=torch.float32)
350
-
351
- def train_model(X, y, input_dim, hidden_dim, num_classes, learning_rate, epochs):
352
- # Model Definition
353
- class SimpleNN(nn.Module):
354
- def __init__(self, input_dim, hidden_dim, num_classes):
355
- super(SimpleNN, self).__init__()
356
- self.fc1 = nn.Linear(input_dim, hidden_dim)
357
- self.relu = nn.ReLU()
358
- self.fc2 = nn.Linear(hidden_dim, num_classes)
359
-
360
- def forward(self, x):
361
- x = self.fc1(x)
362
- x = self.relu(x)
363
- x = self.fc2(x)
364
- return x
365
-
366
- # Model, loss, optimizer
367
- model = SimpleNN(input_dim, hidden_dim, num_classes)
368
- criterion = nn.CrossEntropyLoss()
369
- optimizer = optim.Adam(model.parameters(), lr=learning_rate)
370
-
371
- # Training
372
- loss_curve = []
373
- for epoch in range(epochs):
374
- optimizer.zero_grad()
375
- outputs = model(X)
376
- loss = criterion(outputs, y)
377
- loss.backward()
378
- optimizer.step()
379
- loss_curve.append(loss.item())
380
-
381
- return model, loss_curve
382
-
383
- def plot_loss_curve(loss_curve):
384
- plt.figure()
385
- plt.plot(loss_curve, label="Training Loss")
386
- plt.xlabel("Epochs")
387
- plt.ylabel("Loss")
388
- plt.title("Loss Curve")
389
- plt.legend()
390
- st.pyplot(plt)
391
-
392
- def predict_treatment(new_data, model, batch_size=32):
393
- model.eval()
394
- predictions = []
395
-
396
- # Run predictions in batches for large datasets
397
- with torch.no_grad():
398
- for i in range(0, new_data.size(0), batch_size):
399
- batch_data = new_data[i:i + batch_size]
400
- outputs = model(batch_data)
401
- _, batch_predictions = torch.max(outputs, 1)
402
- predictions.extend(batch_predictions.numpy())
403
-
404
- # Convert numeric predictions back to original label names
405
- return label_encoder.inverse_transform(predictions)
406
-
407
- def evaluate_model_performance(predictions, actual_labels):
408
- # Ensure both predictions and actual_labels are consistently numeric
409
- if isinstance(predictions[0], str):
410
- actual_labels = label_encoder.inverse_transform(actual_labels)
411
- elif isinstance(predictions[0], int):
412
- actual_labels = label_encoder.transform(actual_labels)
413
-
414
- # Calculate evaluation metrics
415
- accuracy = accuracy_score(actual_labels, predictions)
416
- precision = precision_score(actual_labels, predictions, average='weighted')
417
- recall = recall_score(actual_labels, predictions, average='weighted')
418
- f1 = f1_score(actual_labels, predictions, average='weighted')
419
-
420
- # Display metrics
421
- st.subheader("Model Evaluation Metrics")
422
- st.write(f"**Accuracy:** {accuracy:.2f}")
423
- st.write(f"**Precision:** {precision:.2f}")
424
- st.write(f"**Recall:** {recall:.2f}")
425
- st.write(f"**F1-Score:** {f1:.2f}")
426
-
427
- if __name__ == "__main__":
428
- main()
429
- """
 
218
  main()
219
 
220