Spaces:
Sleeping
Sleeping
eaglelandsonce
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -131,9 +131,9 @@ def generate_breast_cancer_data(num_patients):
|
|
131 |
|
132 |
return pd.DataFrame(breast_cancer_data)
|
133 |
|
134 |
-
# Function to generate
|
135 |
def generate_members_from_breast_cancer(breast_cancer_df):
|
136 |
-
|
137 |
"MEMBER_ID": breast_cancer_df["Patient ID"],
|
138 |
"PRIMARY_PERSON_KEY": breast_cancer_df["Patient ID"],
|
139 |
"MEM_GENDER": ["F"] * len(breast_cancer_df),
|
@@ -141,30 +141,37 @@ def generate_members_from_breast_cancer(breast_cancer_df):
|
|
141 |
"MEM_RACE": np.random.choice(["White", "Black", "Asian", None], len(breast_cancer_df)),
|
142 |
"MEM_STATE": np.random.choice(["MI", "HI", "CA"], len(breast_cancer_df)),
|
143 |
"MEM_ZIP3": np.random.randint(100, 999, len(breast_cancer_df)),
|
144 |
-
}
|
145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
|
147 |
-
# Function to generate
|
148 |
def generate_services(num_services, primary_keys):
|
149 |
-
|
150 |
"PRIMARY_PERSON_KEY": np.random.choice(primary_keys, num_services),
|
151 |
-
"Sum of AMT_ALLOWED": np.random.uniform(1000, 10000, num_services),
|
152 |
-
"Sum of AMT_BILLED": np.random.uniform(1000, 15000, num_services),
|
153 |
-
"Count of AMT_PAID": np.random.randint(1, 5, num_services),
|
154 |
"SERVICE_SETTING": np.random.choice(["OUTPATIENT", "INPATIENT"], num_services),
|
155 |
-
|
156 |
-
|
157 |
|
158 |
# Main Streamlit App
|
159 |
st.title("Synthetic Medical Data Generator")
|
160 |
|
161 |
-
#
|
162 |
num_patients = st.slider("Number of Breast Cancer Patients to Generate", 10, 1000, 100)
|
163 |
num_services = st.slider("Number of Services to Generate", 10, 2000, 500)
|
164 |
|
165 |
if st.button("Generate Data"):
|
166 |
breast_cancer_df = generate_breast_cancer_data(num_patients)
|
167 |
members_df = generate_members_from_breast_cancer(breast_cancer_df)
|
|
|
168 |
services_df = generate_services(num_services, breast_cancer_df["Patient ID"].tolist())
|
169 |
|
170 |
# Display and download data
|
@@ -174,8 +181,12 @@ if st.button("Generate Data"):
|
|
174 |
|
175 |
st.subheader("Members Data")
|
176 |
st.dataframe(members_df.head())
|
177 |
-
st.download_button("Download Members", members_df.to_csv(index=False), "members.csv")
|
|
|
|
|
|
|
|
|
178 |
|
179 |
st.subheader("Services Data")
|
180 |
st.dataframe(services_df.head())
|
181 |
-
st.download_button("Download Services", services_df.to_csv(index=False), "services.csv")
|
|
|
131 |
|
132 |
return pd.DataFrame(breast_cancer_data)
|
133 |
|
134 |
+
# Function to generate Members from BreastCancer
|
135 |
def generate_members_from_breast_cancer(breast_cancer_df):
|
136 |
+
return pd.DataFrame({
|
137 |
"MEMBER_ID": breast_cancer_df["Patient ID"],
|
138 |
"PRIMARY_PERSON_KEY": breast_cancer_df["Patient ID"],
|
139 |
"MEM_GENDER": ["F"] * len(breast_cancer_df),
|
|
|
141 |
"MEM_RACE": np.random.choice(["White", "Black", "Asian", None], len(breast_cancer_df)),
|
142 |
"MEM_STATE": np.random.choice(["MI", "HI", "CA"], len(breast_cancer_df)),
|
143 |
"MEM_ZIP3": np.random.randint(100, 999, len(breast_cancer_df)),
|
144 |
+
})
|
145 |
+
|
146 |
+
# Function to generate Enrollments from BreastCancer
|
147 |
+
def generate_enrollments_from_breast_cancer(breast_cancer_df):
|
148 |
+
return pd.DataFrame({
|
149 |
+
"PRIMARY_PERSON_KEY": breast_cancer_df["Patient ID"],
|
150 |
+
"MEM_STAT": np.random.choice(["ACTIVE", "INACTIVE"], len(breast_cancer_df)),
|
151 |
+
"PAYER_LOB": np.random.choice(["MEDICAID", "COMMERCIAL", "MEDICARE"], len(breast_cancer_df)),
|
152 |
+
"PAYER_TYPE": np.random.choice(["PPO", "HMO"], len(breast_cancer_df)),
|
153 |
+
"RELATION": np.random.choice(["SUBSCRIBER", "DEPENDENT"], len(breast_cancer_df)),
|
154 |
+
})
|
155 |
|
156 |
+
# Function to generate Services from BreastCancer
|
157 |
def generate_services(num_services, primary_keys):
|
158 |
+
return pd.DataFrame({
|
159 |
"PRIMARY_PERSON_KEY": np.random.choice(primary_keys, num_services),
|
|
|
|
|
|
|
160 |
"SERVICE_SETTING": np.random.choice(["OUTPATIENT", "INPATIENT"], num_services),
|
161 |
+
"AMOUNT_BILLED": np.random.uniform(500, 15000, num_services),
|
162 |
+
})
|
163 |
|
164 |
# Main Streamlit App
|
165 |
st.title("Synthetic Medical Data Generator")
|
166 |
|
167 |
+
# Sliders
|
168 |
num_patients = st.slider("Number of Breast Cancer Patients to Generate", 10, 1000, 100)
|
169 |
num_services = st.slider("Number of Services to Generate", 10, 2000, 500)
|
170 |
|
171 |
if st.button("Generate Data"):
|
172 |
breast_cancer_df = generate_breast_cancer_data(num_patients)
|
173 |
members_df = generate_members_from_breast_cancer(breast_cancer_df)
|
174 |
+
enrollments_df = generate_enrollments_from_breast_cancer(breast_cancer_df)
|
175 |
services_df = generate_services(num_services, breast_cancer_df["Patient ID"].tolist())
|
176 |
|
177 |
# Display and download data
|
|
|
181 |
|
182 |
st.subheader("Members Data")
|
183 |
st.dataframe(members_df.head())
|
184 |
+
st.download_button("Download Members Data", members_df.to_csv(index=False), "members.csv")
|
185 |
+
|
186 |
+
st.subheader("Enrollments Data")
|
187 |
+
st.dataframe(enrollments_df.head())
|
188 |
+
st.download_button("Download Enrollments Data", enrollments_df.to_csv(index=False), "enrollments.csv")
|
189 |
|
190 |
st.subheader("Services Data")
|
191 |
st.dataframe(services_df.head())
|
192 |
+
st.download_button("Download Services Data", services_df.to_csv(index=False), "services.csv")
|