eaglelandsonce commited on
Commit
da1d261
1 Parent(s): cbd4340

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -7
app.py CHANGED
@@ -1,8 +1,6 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
4
- import csv
5
- import io
6
  import random
7
  from datetime import datetime, timedelta
8
 
@@ -12,7 +10,6 @@ np.random.seed(42)
12
  # Function to generate synthetic BreastCancer data
13
  def generate_breast_cancer_data(num_patients):
14
  primary_keys = [f"PPK_{i+1:05d}" for i in range(num_patients)]
15
-
16
  ages = []
17
  menopausal_status = []
18
  tumor_sizes = []
@@ -135,7 +132,7 @@ def generate_breast_cancer_data(num_patients):
135
 
136
  return pd.DataFrame(breast_cancer_data)
137
 
138
- # Function to generate Members from BreastCancer
139
  def generate_members_from_breast_cancer(breast_cancer_df):
140
  return pd.DataFrame({
141
  "MEMBER_ID": breast_cancer_df["PRIMARY_PERSON_KEY"],
@@ -147,7 +144,7 @@ def generate_members_from_breast_cancer(breast_cancer_df):
147
  "MEM_ZIP3": np.random.randint(100, 999, len(breast_cancer_df)),
148
  })
149
 
150
- # Function to generate Enrollments from BreastCancer
151
  def generate_enrollments_from_breast_cancer(breast_cancer_df):
152
  return pd.DataFrame({
153
  "PRIMARY_PERSON_KEY": breast_cancer_df["PRIMARY_PERSON_KEY"],
@@ -181,7 +178,7 @@ def generate_providers(num_providers):
181
  "PROV_TAXONOMY": np.random.choice(["208100000X", "207RE0101X"], num_providers),
182
  })
183
 
184
- # Function to generate wearable data
185
  def generate_wearable_data(num_patients, num_measurements, start_datetime, time_interval, cancer_rate, chemo_brain_effect, primary_keys):
186
  num_cancer_patients = int((cancer_rate / 100) * num_patients)
187
  cancer_patients = set(random.sample(primary_keys, num_cancer_patients))
@@ -214,4 +211,58 @@ def generate_wearable_data(num_patients, num_measurements, start_datetime, time_
214
  heart_rate = max(heart_rate, 50)
215
  o2_sat = max(o2_sat, 90.0)
216
 
217
- data_rows.append([pkey, ts.strftime("%Y-%m-%d %H:%M
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
 
 
4
  import random
5
  from datetime import datetime, timedelta
6
 
 
10
  # Function to generate synthetic BreastCancer data
11
  def generate_breast_cancer_data(num_patients):
12
  primary_keys = [f"PPK_{i+1:05d}" for i in range(num_patients)]
 
13
  ages = []
14
  menopausal_status = []
15
  tumor_sizes = []
 
132
 
133
  return pd.DataFrame(breast_cancer_data)
134
 
135
+ # Function to generate Members
136
  def generate_members_from_breast_cancer(breast_cancer_df):
137
  return pd.DataFrame({
138
  "MEMBER_ID": breast_cancer_df["PRIMARY_PERSON_KEY"],
 
144
  "MEM_ZIP3": np.random.randint(100, 999, len(breast_cancer_df)),
145
  })
146
 
147
+ # Function to generate Enrollments
148
  def generate_enrollments_from_breast_cancer(breast_cancer_df):
149
  return pd.DataFrame({
150
  "PRIMARY_PERSON_KEY": breast_cancer_df["PRIMARY_PERSON_KEY"],
 
178
  "PROV_TAXONOMY": np.random.choice(["208100000X", "207RE0101X"], num_providers),
179
  })
180
 
181
+ # Function to generate Wearable Data
182
  def generate_wearable_data(num_patients, num_measurements, start_datetime, time_interval, cancer_rate, chemo_brain_effect, primary_keys):
183
  num_cancer_patients = int((cancer_rate / 100) * num_patients)
184
  cancer_patients = set(random.sample(primary_keys, num_cancer_patients))
 
211
  heart_rate = max(heart_rate, 50)
212
  o2_sat = max(o2_sat, 90.0)
213
 
214
+ data_rows.append([
215
+ pkey,
216
+ ts.strftime("%Y-%m-%d %H:%M:%S"),
217
+ activity,
218
+ heart_rate,
219
+ round(o2_sat, 1)
220
+ ])
221
+
222
+ return pd.DataFrame(data_rows, columns=["PRIMARY_PERSON_KEY", "Measurement_Timestamp", "Activity_Level", "Heart_Rate", "O2_Saturation"])
223
+
224
+ # Main Streamlit App
225
+ st.title("Synthetic Medical Data Generator with Wearable Data")
226
+
227
+ # Sliders
228
+ num_patients = st.slider("Number of Breast Cancer Patients to Generate", 10, 1000, 100)
229
+ num_measurements = st.slider("Measurements per Patient (Wearable Data)", 1, 100, 10)
230
+ num_services = st.slider("Number of Services to Generate", 10, 2000, 500)
231
+ num_providers = st.slider("Number of Providers to Generate", 10, 500, 100)
232
+
233
+ start_date = st.date_input("Wearable Data Start Date", value=datetime(2024, 12, 1))
234
+ start_time = st.time_input("Wearable Data Start Time", value=datetime(2024, 12, 1, 8, 0).time())
235
+ cancer_rate = st.slider("Percentage of Patients with Cancer (Wearable Data)", 0, 100, 30)
236
+ chemo_brain_effect = st.slider("Chemo Brain Impact on Activity Level (in % reduction)", 0, 50, 20)
237
+
238
+ if st.button("Generate Data"):
239
+ primary_keys = [f"PPK_{i+1:05d}" for i in range(num_patients)]
240
+ wearable_start_datetime = datetime.combine(start_date, start_time)
241
+ breast_cancer_df = generate_breast_cancer_data(num_patients)
242
+ members_df = generate_members_from_breast_cancer(breast_cancer_df)
243
+ enrollments_df = generate_enrollments_from_breast_cancer(breast_cancer_df)
244
+ services_df = generate_services(num_services, primary_keys)
245
+ providers_df = generate_providers(num_providers)
246
+ wearable_data = generate_wearable_data(
247
+ num_patients, num_measurements, wearable_start_datetime, timedelta(hours=1), cancer_rate, chemo_brain_effect, primary_keys
248
+ )
249
+
250
+ st.subheader("Breast Cancer Data")
251
+ st.dataframe(breast_cancer_df.head())
252
+ st.download_button("Download Breast Cancer Data", breast_cancer_df.to_csv(index=False), "breast_cancer.csv")
253
+
254
+ st.subheader("Members Data")
255
+ st.dataframe(members_df.head())
256
+ st.download_button("Download Members Data", members_df.to_csv(index=False), "members.csv")
257
+
258
+ st.subheader("Enrollments Data")
259
+ st.dataframe(enrollments_df.head())
260
+ st.download_button("Download Enrollments Data", enrollments_df.to_csv(index=False), "enrollments.csv")
261
+
262
+ st.subheader("Services Data")
263
+ st.dataframe(services_df.head())
264
+ st.download_button("Download Services Data", services_df.to_csv(index=False), "services.csv")
265
+
266
+ st.subheader("Providers Data")
267
+ st.dataframe(providers_df.head())
268
+ st.download_button("Download Providers Data", providers_df.to_csv(index=False