Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
# Function to generate synthetic data | |
def generate_synthetic_data(num_members): | |
unique_ids = [f"MEM_{i:05d}" for i in range(1, num_members + 1)] | |
primary_keys = [f"PPK_{i:05d}" for i in range(1, num_members + 1)] | |
# Synthetic Enrollments | |
enrollments_data = { | |
"MEM_AGE": np.random.randint(18, 80, num_members), | |
"MEM_MSA_NAME": np.random.choice(["DETROIT", "HONOLULU", "LOS ANGELES"], num_members), | |
"MEM_STAT": np.random.choice(["ACTIVE", "INACTIVE"], num_members), | |
"MEMBER_ID": unique_ids, | |
"PRIMARY_PERSON_KEY": primary_keys, | |
"PAYER_LOB": np.random.choice(["MEDICAID", "COMMERCIAL", "MEDICARE"], num_members), | |
"PAYER_TYPE": np.random.choice(["PPO", "HMO"], num_members), | |
"PRIMARY_CHRONIC_CONDITION_ROLLUP_DESC": np.random.choice(["Cancer", "Diabetes", "Hypertension"], num_members), | |
"Count of PRIMARY_CHRONIC_CONDITION_ROLLUP_ID": np.random.randint(1, 5, num_members), | |
"PROD_TYPE": np.random.choice(["DENTAL", "VISION", "MEDICAL"], num_members), | |
"RELATION": np.random.choice(["SUBSCRIBER", "DEPENDENT"], num_members), | |
"Sum of QTY_MM_DN": np.random.randint(0, 10, num_members), | |
"Sum of QTY_MM_MD": np.random.randint(0, 10, num_members), | |
"Sum of QTY_MM_RX": np.random.randint(0, 10, num_members), | |
"Sum of QTY_MM_VS": np.random.randint(0, 10, num_members), | |
"YEARMO": np.random.randint(202201, 202412, num_members), | |
} | |
enrollments_df = pd.DataFrame(enrollments_data) | |
# Synthetic Members | |
members_data = { | |
"MEM_ETHNICITY": np.random.choice(["Hispanic", "Non-Hispanic", None], num_members), | |
"MEM_GENDER": ["F"] * num_members, # Ensuring all members are female | |
"MEM_MSA_NAME": enrollments_data["MEM_MSA_NAME"], | |
"MEM_RACE": np.random.choice(["White", "Black", "Asian", None], num_members), | |
"MEM_STATE": np.random.choice(["MI", "HI", "CA"], num_members), | |
"MEM_ZIP3": np.random.randint(100, 999, num_members), | |
"MEMBER_ID": unique_ids, | |
"PRIMARY_PERSON_KEY": primary_keys, | |
} | |
members_df = pd.DataFrame(members_data) | |
# Synthetic Providers | |
providers_data = { | |
"PROV_CLINIC_STATE": np.random.choice(["MI", "HI", "CA"], num_members), | |
"PROV_CLINIC_ZIP": np.random.randint(10000, 99999, num_members), | |
"PROV_KEY": [f"PK_{i:05d}" for i in range(1, num_members + 1)], | |
"PROV_NPI_ORG": np.random.randint(1, 50, num_members), | |
"PROV_TAXONOMY": np.random.choice(["208100000X", "207RE0101X"], num_members), | |
"PROV_TYPE": np.random.choice(["Type1", "Type2"], num_members), | |
} | |
providers_df = pd.DataFrame(providers_data) | |
# Synthetic Services | |
services_data = { | |
"MEMBER_ID": unique_ids, | |
"PRIMARY_PERSON_KEY": primary_keys, | |
"Sum of AMT_ALLOWED": np.random.uniform(1000, 10000, num_members), | |
"Sum of AMT_BILLED": np.random.uniform(1000, 15000, num_members), | |
"Count of AMT_PAID": np.random.randint(1, 5, num_members), | |
"ATT_PROV_KEY": np.random.choice(providers_data["PROV_KEY"], num_members), | |
"BILL_PROV_KEY": np.random.choice(providers_data["PROV_KEY"], num_members), | |
"CLAIM_IN_NETWORK": np.random.choice(["Y", "N", None], num_members), | |
"RELATION": enrollments_data["RELATION"], | |
"SERVICE_SETTING": np.random.choice(["OUTPATIENT", "INPATIENT"], num_members), | |
"Sum of SERVICE_LINE": np.random.randint(1, 10, num_members), | |
"Sum of SV_UNITS": np.random.randint(1, 100, num_members), | |
"YEARMO": enrollments_data["YEARMO"], | |
} | |
services_df = pd.DataFrame(services_data) | |
return enrollments_df, members_df, providers_df, services_df | |
# Streamlit App | |
st.title("Synthetic Medical Billing Data Generator") | |
# Slider for number of members | |
num_members = st.slider("Select number of unique members:", min_value=10, max_value=1000, step=10, value=100) | |
# Generate synthetic data | |
enrollments_df, members_df, providers_df, services_df = generate_synthetic_data(num_members) | |
# Display dataframes | |
st.subheader("Preview of Generated Data") | |
st.write("Enrollments Data") | |
st.dataframe(enrollments_df.head()) | |
st.write("Members Data") | |
st.dataframe(members_df.head()) | |
st.write("Providers Data") | |
st.dataframe(providers_df.head()) | |
st.write("Services Data") | |
st.dataframe(services_df.head()) | |
# Allow downloading the generated files | |
st.subheader("Download Synthetic Data") | |
st.download_button( | |
label="Download Enrollments Data", | |
data=enrollments_df.to_csv(index=False), | |
file_name="Synthetic_Enrollments.csv", | |
mime="text/csv", | |
) | |
st.download_button( | |
label="Download Members Data", | |
data=members_df.to_csv(index=False), | |
file_name="Synthetic_Members.csv", | |
mime="text/csv", | |
) | |
st.download_button( | |
label="Download Providers Data", | |
data=providers_df.to_csv(index=False), | |
file_name="Synthetic_Providers.csv", | |
mime="text/csv", | |
) | |
st.download_button( | |
label="Download Services Data", | |
data=services_df.to_csv(index=False), | |
file_name="Synthetic_Services.csv", | |
mime="text/csv", | |
) | |