eaglelandsonce's picture
Update app.py
120bed9 verified
import streamlit as st
import pandas as pd
import numpy as np
# Seed for reproducibility
np.random.seed(42)
# Function to generate synthetic data
def generate_realistic_data(num_patients=100):
# Initialize data lists
patient_ids = []
ages = []
menopausal_status = []
tumor_sizes = []
lymph_nodes = []
grades = []
stages = []
er_status = []
pr_status = []
her2_status = []
ki67_level = []
tnbc_status = []
brca_mutation = []
overall_health = []
genomic_score = []
treatment = []
for i in range(num_patients):
# Patient ID
patient_id = i + 1 # Start patient IDs from 1
patient_ids.append(patient_id)
# Age: Normally distributed between 30 and 80 years
age = int(np.random.normal(60, 10))
age = max(30, min(age, 80)) # Ensure age is between 30 and 80
ages.append(age)
# Menopausal Status: Determined by age
menopausal = 'Post-menopausal' if age >= 50 else 'Pre-menopausal'
menopausal_status.append(menopausal)
# Tumor Size in cm: Log-normal distribution
tumor_size = round(np.random.lognormal(mean=0.7, sigma=0.5), 2)
tumor_sizes.append(tumor_size)
# Lymph Node Involvement: Higher chance with larger tumors
lymph_node = 'Positive' if (tumor_size > 2.0 and np.random.rand() < 0.6) or (tumor_size <= 2.0 and np.random.rand() < 0.3) else 'Negative'
lymph_nodes.append(lymph_node)
# Tumor Grade (1-3): Higher grades more likely with larger tumors
grade = np.random.choice([1, 2, 3], p=[0.1, 0.4, 0.5] if tumor_size > 2.0 else [0.3, 0.5, 0.2])
grades.append(grade)
# Tumor Stage (I-IV): Based on tumor size and lymph node involvement
if tumor_size <= 2.0 and lymph_node == 'Negative':
stage = 'I'
elif (tumor_size > 2.0 and tumor_size <= 5.0) and lymph_node == 'Negative':
stage = 'II'
elif lymph_node == 'Positive' or tumor_size > 5.0:
stage = 'III'
else:
stage = 'II'
if np.random.rand() < 0.05:
stage = 'IV'
stages.append(stage)
# Hormone Receptor Status (ER and PR)
er = np.random.choice(['Positive', 'Negative'], p=[0.75, 0.25])
pr = 'Positive' if er == 'Positive' and np.random.rand() > 0.1 else 'Negative'
er_status.append(er)
pr_status.append(pr)
# HER2 Status: Correlates with tumor grade
her2 = np.random.choice(['Positive', 'Negative'], p=[0.3, 0.7] if grade == 3 else [0.15, 0.85])
her2_status.append(her2)
# Ki-67 Level: Higher in higher-grade tumors
ki67 = 'High' if grade == 3 and np.random.rand() < 0.8 else 'Low'
ki67_level.append(ki67)
# Triple-Negative Status (TNBC)
tnbc = 'Positive' if er == 'Negative' and pr == 'Negative' and her2 == 'Negative' else 'Negative'
tnbc_status.append(tnbc)
# BRCA Mutation: Higher in TNBC and younger patients
brca = 'Positive' if tnbc == 'Positive' or age < 40 and np.random.rand() < 0.2 else 'Negative'
brca_mutation.append(brca)
# Overall Health: Varies with age
health = 'Good' if age < 65 and np.random.rand() < 0.9 else 'Poor'
overall_health.append(health)
# Genomic Recurrence Score: For ER+, HER2- patients
recurrence_score = np.random.choice(['Low', 'Intermediate', 'High'], p=[0.6, 0.3, 0.1]) if er == 'Positive' and her2 == 'Negative' else 'N/A'
genomic_score.append(recurrence_score)
# Treatment based on NCCN guidelines
if stage in ['I', 'II']:
if tnbc == 'Positive':
treat = 'Surgery, Chemotherapy, and Radiation Therapy' + (', plus PARP Inhibitors' if brca == 'Positive' else '')
elif er == 'Positive' and recurrence_score != 'N/A':
if recurrence_score == 'High':
treat = 'Surgery, Chemotherapy, Hormone Therapy, and Radiation Therapy'
elif recurrence_score == 'Intermediate':
treat = 'Surgery, Consider Chemotherapy, Hormone Therapy, and Radiation Therapy'
else:
treat = 'Surgery, Hormone Therapy, and Radiation Therapy'
elif her2 == 'Positive':
treat = 'Surgery, HER2-Targeted Therapy, Chemotherapy, and Radiation Therapy'
else:
treat = 'Surgery, Chemotherapy, and Radiation Therapy'
elif stage == 'III':
treat = 'Neoadjuvant Chemotherapy, Surgery, Radiation Therapy' + (', HER2-Targeted Therapy' if her2 == 'Positive' else '') + (', Hormone Therapy' if er == 'Positive' else '')
else:
treat = 'Systemic Therapy (' + ', '.join([option for option in ['Hormone Therapy' if er == 'Positive' else '', 'HER2-Targeted Therapy' if her2 == 'Positive' else '', 'Chemotherapy' if tnbc == 'Positive' else ''] if option]) + '), Palliative Care' if health == 'Good' else 'Palliative Care Only'
treatment.append(treat)
# Create DataFrame
data = {
'Patient ID': patient_ids,
'Age': ages,
'Menopausal Status': menopausal_status,
'Tumor Size (cm)': tumor_sizes,
'Lymph Node Involvement': lymph_nodes,
'Tumor Grade': grades,
'Tumor Stage': stages,
'ER Status': er_status,
'PR Status': pr_status,
'HER2 Status': her2_status,
'Ki-67 Level': ki67_level,
'TNBC Status': tnbc_status,
'BRCA Mutation': brca_mutation,
'Overall Health': overall_health,
'Genomic Recurrence Score': genomic_score,
'Treatment': treatment
}
df = pd.DataFrame(data)
return df
def main():
st.title('Synthetic Breast Cancer Patient Data Generator')
st.write('This app generates synthetic breast cancer patient data based on NCCN guidelines.')
# User inputs
num_patients = st.number_input('Number of Patients to Generate', min_value=10, max_value=10000, value=100, step=10)
if st.button('Generate Data'):
df = generate_realistic_data(num_patients=num_patients)
st.success(f'Generated data for {num_patients} patients.')
# Display DataFrame
st.dataframe(df)
# Provide download link for data with Treatment column
csv_with_treatment = df.to_csv(index=False).encode('utf-8')
st.download_button(
label="Download data as CSV with Treatment",
data=csv_with_treatment,
file_name='synthetic_breast_cancer_data_with_treatment.csv',
mime='text/csv',
)
# Provide download link for data with Treatment column renamed to CheckTreatment
df_check_treatment = df.rename(columns={'Treatment': 'CheckTreatment'})
csv_check_treatment = df_check_treatment.to_csv(index=False).encode('utf-8')
st.download_button(
label="Download data as CSV with CheckTreatment",
data=csv_check_treatment,
file_name='synthetic_breast_cancer_data_with_check_treatment.csv',
mime='text/csv',
)
if __name__ == '__main__':
main()