Spaces:
Sleeping
Sleeping
eaglelandsonce
commited on
Commit
•
da1d261
1
Parent(s):
cbd4340
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,6 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
-
import csv
|
5 |
-
import io
|
6 |
import random
|
7 |
from datetime import datetime, timedelta
|
8 |
|
@@ -12,7 +10,6 @@ np.random.seed(42)
|
|
12 |
# Function to generate synthetic BreastCancer data
|
13 |
def generate_breast_cancer_data(num_patients):
|
14 |
primary_keys = [f"PPK_{i+1:05d}" for i in range(num_patients)]
|
15 |
-
|
16 |
ages = []
|
17 |
menopausal_status = []
|
18 |
tumor_sizes = []
|
@@ -135,7 +132,7 @@ def generate_breast_cancer_data(num_patients):
|
|
135 |
|
136 |
return pd.DataFrame(breast_cancer_data)
|
137 |
|
138 |
-
# Function to generate Members
|
139 |
def generate_members_from_breast_cancer(breast_cancer_df):
|
140 |
return pd.DataFrame({
|
141 |
"MEMBER_ID": breast_cancer_df["PRIMARY_PERSON_KEY"],
|
@@ -147,7 +144,7 @@ def generate_members_from_breast_cancer(breast_cancer_df):
|
|
147 |
"MEM_ZIP3": np.random.randint(100, 999, len(breast_cancer_df)),
|
148 |
})
|
149 |
|
150 |
-
# Function to generate Enrollments
|
151 |
def generate_enrollments_from_breast_cancer(breast_cancer_df):
|
152 |
return pd.DataFrame({
|
153 |
"PRIMARY_PERSON_KEY": breast_cancer_df["PRIMARY_PERSON_KEY"],
|
@@ -181,7 +178,7 @@ def generate_providers(num_providers):
|
|
181 |
"PROV_TAXONOMY": np.random.choice(["208100000X", "207RE0101X"], num_providers),
|
182 |
})
|
183 |
|
184 |
-
# Function to generate
|
185 |
def generate_wearable_data(num_patients, num_measurements, start_datetime, time_interval, cancer_rate, chemo_brain_effect, primary_keys):
|
186 |
num_cancer_patients = int((cancer_rate / 100) * num_patients)
|
187 |
cancer_patients = set(random.sample(primary_keys, num_cancer_patients))
|
@@ -214,4 +211,58 @@ def generate_wearable_data(num_patients, num_measurements, start_datetime, time_
|
|
214 |
heart_rate = max(heart_rate, 50)
|
215 |
o2_sat = max(o2_sat, 90.0)
|
216 |
|
217 |
-
data_rows.append([
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
|
|
|
|
4 |
import random
|
5 |
from datetime import datetime, timedelta
|
6 |
|
|
|
10 |
# Function to generate synthetic BreastCancer data
|
11 |
def generate_breast_cancer_data(num_patients):
|
12 |
primary_keys = [f"PPK_{i+1:05d}" for i in range(num_patients)]
|
|
|
13 |
ages = []
|
14 |
menopausal_status = []
|
15 |
tumor_sizes = []
|
|
|
132 |
|
133 |
return pd.DataFrame(breast_cancer_data)
|
134 |
|
135 |
+
# Function to generate Members
|
136 |
def generate_members_from_breast_cancer(breast_cancer_df):
|
137 |
return pd.DataFrame({
|
138 |
"MEMBER_ID": breast_cancer_df["PRIMARY_PERSON_KEY"],
|
|
|
144 |
"MEM_ZIP3": np.random.randint(100, 999, len(breast_cancer_df)),
|
145 |
})
|
146 |
|
147 |
+
# Function to generate Enrollments
|
148 |
def generate_enrollments_from_breast_cancer(breast_cancer_df):
|
149 |
return pd.DataFrame({
|
150 |
"PRIMARY_PERSON_KEY": breast_cancer_df["PRIMARY_PERSON_KEY"],
|
|
|
178 |
"PROV_TAXONOMY": np.random.choice(["208100000X", "207RE0101X"], num_providers),
|
179 |
})
|
180 |
|
181 |
+
# Function to generate Wearable Data
|
182 |
def generate_wearable_data(num_patients, num_measurements, start_datetime, time_interval, cancer_rate, chemo_brain_effect, primary_keys):
|
183 |
num_cancer_patients = int((cancer_rate / 100) * num_patients)
|
184 |
cancer_patients = set(random.sample(primary_keys, num_cancer_patients))
|
|
|
211 |
heart_rate = max(heart_rate, 50)
|
212 |
o2_sat = max(o2_sat, 90.0)
|
213 |
|
214 |
+
data_rows.append([
|
215 |
+
pkey,
|
216 |
+
ts.strftime("%Y-%m-%d %H:%M:%S"),
|
217 |
+
activity,
|
218 |
+
heart_rate,
|
219 |
+
round(o2_sat, 1)
|
220 |
+
])
|
221 |
+
|
222 |
+
return pd.DataFrame(data_rows, columns=["PRIMARY_PERSON_KEY", "Measurement_Timestamp", "Activity_Level", "Heart_Rate", "O2_Saturation"])
|
223 |
+
|
224 |
+
# Main Streamlit App
|
225 |
+
st.title("Synthetic Medical Data Generator with Wearable Data")
|
226 |
+
|
227 |
+
# Sliders
|
228 |
+
num_patients = st.slider("Number of Breast Cancer Patients to Generate", 10, 1000, 100)
|
229 |
+
num_measurements = st.slider("Measurements per Patient (Wearable Data)", 1, 100, 10)
|
230 |
+
num_services = st.slider("Number of Services to Generate", 10, 2000, 500)
|
231 |
+
num_providers = st.slider("Number of Providers to Generate", 10, 500, 100)
|
232 |
+
|
233 |
+
start_date = st.date_input("Wearable Data Start Date", value=datetime(2024, 12, 1))
|
234 |
+
start_time = st.time_input("Wearable Data Start Time", value=datetime(2024, 12, 1, 8, 0).time())
|
235 |
+
cancer_rate = st.slider("Percentage of Patients with Cancer (Wearable Data)", 0, 100, 30)
|
236 |
+
chemo_brain_effect = st.slider("Chemo Brain Impact on Activity Level (in % reduction)", 0, 50, 20)
|
237 |
+
|
238 |
+
if st.button("Generate Data"):
|
239 |
+
primary_keys = [f"PPK_{i+1:05d}" for i in range(num_patients)]
|
240 |
+
wearable_start_datetime = datetime.combine(start_date, start_time)
|
241 |
+
breast_cancer_df = generate_breast_cancer_data(num_patients)
|
242 |
+
members_df = generate_members_from_breast_cancer(breast_cancer_df)
|
243 |
+
enrollments_df = generate_enrollments_from_breast_cancer(breast_cancer_df)
|
244 |
+
services_df = generate_services(num_services, primary_keys)
|
245 |
+
providers_df = generate_providers(num_providers)
|
246 |
+
wearable_data = generate_wearable_data(
|
247 |
+
num_patients, num_measurements, wearable_start_datetime, timedelta(hours=1), cancer_rate, chemo_brain_effect, primary_keys
|
248 |
+
)
|
249 |
+
|
250 |
+
st.subheader("Breast Cancer Data")
|
251 |
+
st.dataframe(breast_cancer_df.head())
|
252 |
+
st.download_button("Download Breast Cancer Data", breast_cancer_df.to_csv(index=False), "breast_cancer.csv")
|
253 |
+
|
254 |
+
st.subheader("Members Data")
|
255 |
+
st.dataframe(members_df.head())
|
256 |
+
st.download_button("Download Members Data", members_df.to_csv(index=False), "members.csv")
|
257 |
+
|
258 |
+
st.subheader("Enrollments Data")
|
259 |
+
st.dataframe(enrollments_df.head())
|
260 |
+
st.download_button("Download Enrollments Data", enrollments_df.to_csv(index=False), "enrollments.csv")
|
261 |
+
|
262 |
+
st.subheader("Services Data")
|
263 |
+
st.dataframe(services_df.head())
|
264 |
+
st.download_button("Download Services Data", services_df.to_csv(index=False), "services.csv")
|
265 |
+
|
266 |
+
st.subheader("Providers Data")
|
267 |
+
st.dataframe(providers_df.head())
|
268 |
+
st.download_button("Download Providers Data", providers_df.to_csv(index=False
|