eaglelandsonce commited on
Commit
cbd4340
·
verified ·
1 Parent(s): d202149

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -76
app.py CHANGED
@@ -12,40 +12,128 @@ np.random.seed(42)
12
  # Function to generate synthetic BreastCancer data
13
  def generate_breast_cancer_data(num_patients):
14
  primary_keys = [f"PPK_{i+1:05d}" for i in range(num_patients)]
15
- ages = np.random.randint(30, 80, size=num_patients)
16
- menopausal_status = ["Post-menopausal" if age >= 50 else "Pre-menopausal" for age in ages]
17
- tumor_sizes = np.round(np.random.lognormal(mean=0.7, sigma=0.5, size=num_patients), 2)
18
- lymph_nodes = np.random.choice(["Positive", "Negative"], size=num_patients, p=[0.4, 0.6])
19
- tumor_grades = np.random.choice([1, 2, 3], size=num_patients, p=[0.3, 0.5, 0.2])
20
- tumor_stages = np.random.choice(["I", "II", "III", "IV"], size=num_patients, p=[0.4, 0.3, 0.2, 0.1])
21
- er_status = np.random.choice(["Positive", "Negative"], size=num_patients, p=[0.75, 0.25])
22
- pr_status = np.random.choice(["Positive", "Negative"], size=num_patients, p=[0.7, 0.3])
23
- her2_status = np.random.choice(["Positive", "Negative"], size=num_patients, p=[0.3, 0.7])
24
- ki67_levels = np.random.choice(["High", "Low"], size=num_patients, p=[0.6, 0.4])
25
- tnbc_status = ["Positive" if er == "Negative" and pr == "Negative" and her2 == "Negative" else "Negative" for er, pr, her2 in zip(er_status, pr_status, her2_status)]
26
- brca_mutation = np.random.choice(["Positive", "Negative"], size=num_patients, p=[0.1, 0.9])
27
- overall_health = np.random.choice(["Good", "Poor"], size=num_patients, p=[0.7, 0.3])
28
- genomic_score = np.random.choice(["Low", "Intermediate", "High", "N/A"], size=num_patients, p=[0.3, 0.2, 0.1, 0.4])
29
- treatments = np.random.choice(["Surgery", "Chemotherapy", "Radiation Therapy"], size=num_patients)
30
-
31
- return pd.DataFrame({
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  "PRIMARY_PERSON_KEY": primary_keys,
33
  "Age": ages,
34
  "Menopausal Status": menopausal_status,
35
  "Tumor Size (cm)": tumor_sizes,
36
  "Lymph Node Involvement": lymph_nodes,
37
- "Tumor Grade": tumor_grades,
38
- "Tumor Stage": tumor_stages,
39
  "ER Status": er_status,
40
  "PR Status": pr_status,
41
  "HER2 Status": her2_status,
42
- "Ki-67 Level": ki67_levels,
43
  "TNBC Status": tnbc_status,
44
  "BRCA Mutation": brca_mutation,
45
  "Overall Health": overall_health,
46
  "Genomic Recurrence Score": genomic_score,
47
- "Treatment": treatments
48
- })
 
 
49
 
50
  # Function to generate Members from BreastCancer
51
  def generate_members_from_breast_cancer(breast_cancer_df):
@@ -69,7 +157,7 @@ def generate_enrollments_from_breast_cancer(breast_cancer_df):
69
  "RELATION": np.random.choice(["SUBSCRIBER", "DEPENDENT"], len(breast_cancer_df)),
70
  })
71
 
72
- # Function to generate Services from BreastCancer
73
  def generate_services(num_services, primary_keys):
74
  return pd.DataFrame({
75
  "PRIMARY_PERSON_KEY": np.random.choice(primary_keys, num_services),
@@ -126,56 +214,4 @@ def generate_wearable_data(num_patients, num_measurements, start_datetime, time_
126
  heart_rate = max(heart_rate, 50)
127
  o2_sat = max(o2_sat, 90.0)
128
 
129
- data_rows.append([pkey, ts.strftime("%Y-%m-%d %H:%M:%S"), activity, heart_rate, round(o2_sat, 1)])
130
-
131
- return pd.DataFrame(data_rows, columns=["PRIMARY_PERSON_KEY", "Measurement_Timestamp", "Activity_Level", "Heart_Rate", "O2_Saturation"])
132
-
133
- # Main Streamlit App
134
- st.title("Synthetic Medical Data Generator with Wearable Data")
135
-
136
- # Sliders
137
- num_patients = st.slider("Number of Breast Cancer Patients to Generate", 10, 1000, 100)
138
- num_measurements = st.slider("Measurements per Patient (Wearable Data)", 1, 100, 10)
139
- num_services = st.slider("Number of Services to Generate", 10, 2000, 500)
140
- num_providers = st.slider("Number of Providers to Generate", 10, 500, 100)
141
-
142
- start_date = st.date_input("Wearable Data Start Date", value=datetime(2024, 12, 1))
143
- start_time = st.time_input("Wearable Data Start Time", value=datetime(2024, 12, 1, 8, 0).time())
144
- cancer_rate = st.slider("Percentage of Patients with Cancer (Wearable Data)", 0, 100, 30)
145
- chemo_brain_effect = st.slider("Chemo Brain Impact on Activity Level (in % reduction)", 0, 50, 20)
146
-
147
- if st.button("Generate Data"):
148
- primary_keys = [f"PPK_{i+1:05d}" for i in range(num_patients)]
149
- wearable_start_datetime = datetime.combine(start_date, start_time)
150
- breast_cancer_df = generate_breast_cancer_data(num_patients)
151
- members_df = generate_members_from_breast_cancer(breast_cancer_df)
152
- enrollments_df = generate_enrollments_from_breast_cancer(breast_cancer_df)
153
- services_df = generate_services(num_services, primary_keys)
154
- providers_df = generate_providers(num_providers)
155
- wearable_data = generate_wearable_data(
156
- num_patients, num_measurements, wearable_start_datetime, timedelta(hours=1), cancer_rate, chemo_brain_effect, primary_keys
157
- )
158
-
159
- st.subheader("Breast Cancer Data")
160
- st.dataframe(breast_cancer_df.head())
161
- st.download_button("Download Breast Cancer Data", breast_cancer_df.to_csv(index=False), "breast_cancer.csv")
162
-
163
- st.subheader("Members Data")
164
- st.dataframe(members_df.head())
165
- st.download_button("Download Members Data", members_df.to_csv(index=False), "members.csv")
166
-
167
- st.subheader("Enrollments Data")
168
- st.dataframe(enrollments_df.head())
169
- st.download_button("Download Enrollments Data", enrollments_df.to_csv(index=False), "enrollments.csv")
170
-
171
- st.subheader("Services Data")
172
- st.dataframe(services_df.head())
173
- st.download_button("Download Services Data", services_df.to_csv(index=False), "services.csv")
174
-
175
- st.subheader("Providers Data")
176
- st.dataframe(providers_df.head())
177
- st.download_button("Download Providers Data", providers_df.to_csv(index=False), "providers.csv")
178
-
179
- st.subheader("Wearable Data")
180
- st.dataframe(wearable_data.head())
181
- st.download_button("Download Wearable Data", wearable_data.to_csv(index=False), "wearable_data.csv")
 
12
  # Function to generate synthetic BreastCancer data
13
  def generate_breast_cancer_data(num_patients):
14
  primary_keys = [f"PPK_{i+1:05d}" for i in range(num_patients)]
15
+
16
+ ages = []
17
+ menopausal_status = []
18
+ tumor_sizes = []
19
+ lymph_nodes = []
20
+ grades = []
21
+ stages = []
22
+ er_status = []
23
+ pr_status = []
24
+ her2_status = []
25
+ ki67_level = []
26
+ tnbc_status = []
27
+ brca_mutation = []
28
+ overall_health = []
29
+ genomic_score = []
30
+ treatment = []
31
+
32
+ for i in range(num_patients):
33
+ age = int(np.random.normal(60, 10))
34
+ age = max(30, min(age, 80))
35
+ ages.append(age)
36
+
37
+ menopausal = "Post-menopausal" if age >= 50 else "Pre-menopausal"
38
+ menopausal_status.append(menopausal)
39
+
40
+ tumor_size = round(np.random.lognormal(mean=0.7, sigma=0.5), 2)
41
+ tumor_sizes.append(tumor_size)
42
+
43
+ lymph_node = (
44
+ "Positive"
45
+ if (tumor_size > 2.0 and np.random.rand() < 0.6)
46
+ or (tumor_size <= 2.0 and np.random.rand() < 0.3)
47
+ else "Negative"
48
+ )
49
+ lymph_nodes.append(lymph_node)
50
+
51
+ grade = np.random.choice([1, 2, 3], p=[0.1, 0.4, 0.5] if tumor_size > 2.0 else [0.3, 0.5, 0.2])
52
+ grades.append(grade)
53
+
54
+ if tumor_size <= 2.0 and lymph_node == "Negative":
55
+ stage = "I"
56
+ elif (tumor_size > 2.0 and tumor_size <= 5.0) and lymph_node == "Negative":
57
+ stage = "II"
58
+ elif lymph_node == "Positive" or tumor_size > 5.0:
59
+ stage = "III"
60
+ else:
61
+ stage = "II"
62
+ if np.random.rand() < 0.05:
63
+ stage = "IV"
64
+ stages.append(stage)
65
+
66
+ er = np.random.choice(["Positive", "Negative"], p=[0.75, 0.25])
67
+ pr = "Positive" if er == "Positive" and np.random.rand() > 0.1 else "Negative"
68
+ er_status.append(er)
69
+ pr_status.append(pr)
70
+
71
+ her2 = np.random.choice(["Positive", "Negative"], p=[0.3, 0.7] if grade == 3 else [0.15, 0.85])
72
+ her2_status.append(her2)
73
+
74
+ ki67 = "High" if grade == 3 and np.random.rand() < 0.8 else "Low"
75
+ ki67_level.append(ki67)
76
+
77
+ tnbc = "Positive" if er == "Negative" and pr == "Negative" and her2 == "Negative" else "Negative"
78
+ tnbc_status.append(tnbc)
79
+
80
+ brca = "Positive" if (tnbc == "Positive" or age < 40) and np.random.rand() < 0.2 else "Negative"
81
+ brca_mutation.append(brca)
82
+
83
+ health = "Good" if age < 65 and np.random.rand() < 0.9 else "Poor"
84
+ overall_health.append(health)
85
+
86
+ recurrence_score = (
87
+ np.random.choice(["Low", "Intermediate", "High"], p=[0.6, 0.3, 0.1])
88
+ if er == "Positive" and her2 == "Negative"
89
+ else "N/A"
90
+ )
91
+ genomic_score.append(recurrence_score)
92
+
93
+ if stage in ["I", "II"]:
94
+ if tnbc == "Positive":
95
+ treat = "Surgery, Chemotherapy, and Radiation Therapy"
96
+ elif er == "Positive" and recurrence_score != "N/A":
97
+ if recurrence_score == "High":
98
+ treat = "Surgery, Chemotherapy, Hormone Therapy, and Radiation Therapy"
99
+ elif recurrence_score == "Intermediate":
100
+ treat = "Surgery, Consider Chemotherapy, Hormone Therapy, and Radiation Therapy"
101
+ else:
102
+ treat = "Surgery, Hormone Therapy, and Radiation Therapy"
103
+ elif her2 == "Positive":
104
+ treat = "Surgery, HER2-Targeted Therapy, Chemotherapy, and Radiation Therapy"
105
+ else:
106
+ treat = "Surgery, Chemotherapy, and Radiation Therapy"
107
+ elif stage == "III":
108
+ treat = (
109
+ "Neoadjuvant Chemotherapy, Surgery, Radiation Therapy"
110
+ + (", HER2-Targeted Therapy" if her2 == "Positive" else "")
111
+ + (", Hormone Therapy" if er == "Positive" else "")
112
+ )
113
+ else:
114
+ treat = "Systemic Therapy (Palliative Care)"
115
+ treatment.append(treat)
116
+
117
+ breast_cancer_data = {
118
  "PRIMARY_PERSON_KEY": primary_keys,
119
  "Age": ages,
120
  "Menopausal Status": menopausal_status,
121
  "Tumor Size (cm)": tumor_sizes,
122
  "Lymph Node Involvement": lymph_nodes,
123
+ "Tumor Grade": grades,
124
+ "Tumor Stage": stages,
125
  "ER Status": er_status,
126
  "PR Status": pr_status,
127
  "HER2 Status": her2_status,
128
+ "Ki-67 Level": ki67_level,
129
  "TNBC Status": tnbc_status,
130
  "BRCA Mutation": brca_mutation,
131
  "Overall Health": overall_health,
132
  "Genomic Recurrence Score": genomic_score,
133
+ "Treatment": treatment,
134
+ }
135
+
136
+ return pd.DataFrame(breast_cancer_data)
137
 
138
  # Function to generate Members from BreastCancer
139
  def generate_members_from_breast_cancer(breast_cancer_df):
 
157
  "RELATION": np.random.choice(["SUBSCRIBER", "DEPENDENT"], len(breast_cancer_df)),
158
  })
159
 
160
+ # Function to generate Services
161
  def generate_services(num_services, primary_keys):
162
  return pd.DataFrame({
163
  "PRIMARY_PERSON_KEY": np.random.choice(primary_keys, num_services),
 
214
  heart_rate = max(heart_rate, 50)
215
  o2_sat = max(o2_sat, 90.0)
216
 
217
+ data_rows.append([pkey, ts.strftime("%Y-%m-%d %H:%M