Spaces:

eaglelandsonce
/

Lokahi_data

Sleeping

App Files Files Community

Lokahi_data / app.py

eaglelandsonce

Update app.py

c069834 verified about 1 month ago

raw

history blame

5.14 kB

	import streamlit as st
	import pandas as pd
	import numpy as np

	# Function to generate synthetic data
	def generate_synthetic_data(num_members):
	unique_ids = [f"MEM_{i:05d}" for i in range(1, num_members + 1)]
	primary_keys = [f"PPK_{i:05d}" for i in range(1, num_members + 1)]

	# Synthetic Enrollments
	enrollments_data = {
	"MEM_AGE": np.random.randint(18, 80, num_members),
	"MEM_MSA_NAME": np.random.choice(["DETROIT", "HONOLULU", "LOS ANGELES"], num_members),
	"MEM_STAT": np.random.choice(["ACTIVE", "INACTIVE"], num_members),
	"MEMBER_ID": unique_ids,
	"PRIMARY_PERSON_KEY": primary_keys,
	"PAYER_LOB": np.random.choice(["MEDICAID", "COMMERCIAL", "MEDICARE"], num_members),
	"PAYER_TYPE": np.random.choice(["PPO", "HMO"], num_members),
	"PRIMARY_CHRONIC_CONDITION_ROLLUP_DESC": np.random.choice(["Cancer", "Diabetes", "Hypertension"], num_members),
	"Count of PRIMARY_CHRONIC_CONDITION_ROLLUP_ID": np.random.randint(1, 5, num_members),
	"PROD_TYPE": np.random.choice(["DENTAL", "VISION", "MEDICAL"], num_members),
	"RELATION": np.random.choice(["SUBSCRIBER", "DEPENDENT"], num_members),
	"Sum of QTY_MM_DN": np.random.randint(0, 10, num_members),
	"Sum of QTY_MM_MD": np.random.randint(0, 10, num_members),
	"Sum of QTY_MM_RX": np.random.randint(0, 10, num_members),
	"Sum of QTY_MM_VS": np.random.randint(0, 10, num_members),
	"YEARMO": np.random.randint(202201, 202412, num_members),
	}
	enrollments_df = pd.DataFrame(enrollments_data)

	# Synthetic Members
	members_data = {
	"MEM_ETHNICITY": np.random.choice(["Hispanic", "Non-Hispanic", None], num_members),
	"MEM_GENDER": ["F"] * num_members, # Ensuring all members are female
	"MEM_MSA_NAME": enrollments_data["MEM_MSA_NAME"],
	"MEM_RACE": np.random.choice(["White", "Black", "Asian", None], num_members),
	"MEM_STATE": np.random.choice(["MI", "HI", "CA"], num_members),
	"MEM_ZIP3": np.random.randint(100, 999, num_members),
	"MEMBER_ID": unique_ids,
	"PRIMARY_PERSON_KEY": primary_keys,
	}
	members_df = pd.DataFrame(members_data)

	# Synthetic Providers
	providers_data = {
	"PROV_CLINIC_STATE": np.random.choice(["MI", "HI", "CA"], num_members),
	"PROV_CLINIC_ZIP": np.random.randint(10000, 99999, num_members),
	"PROV_KEY": [f"PK_{i:05d}" for i in range(1, num_members + 1)],
	"PROV_NPI_ORG": np.random.randint(1, 50, num_members),
	"PROV_TAXONOMY": np.random.choice(["208100000X", "207RE0101X"], num_members),
	"PROV_TYPE": np.random.choice(["Type1", "Type2"], num_members),
	}
	providers_df = pd.DataFrame(providers_data)

	# Synthetic Services
	services_data = {
	"MEMBER_ID": unique_ids,
	"PRIMARY_PERSON_KEY": primary_keys,
	"Sum of AMT_ALLOWED": np.random.uniform(1000, 10000, num_members),
	"Sum of AMT_BILLED": np.random.uniform(1000, 15000, num_members),
	"Count of AMT_PAID": np.random.randint(1, 5, num_members),
	"ATT_PROV_KEY": np.random.choice(providers_data["PROV_KEY"], num_members),
	"BILL_PROV_KEY": np.random.choice(providers_data["PROV_KEY"], num_members),
	"CLAIM_IN_NETWORK": np.random.choice(["Y", "N", None], num_members),
	"RELATION": enrollments_data["RELATION"],
	"SERVICE_SETTING": np.random.choice(["OUTPATIENT", "INPATIENT"], num_members),
	"Sum of SERVICE_LINE": np.random.randint(1, 10, num_members),
	"Sum of SV_UNITS": np.random.randint(1, 100, num_members),
	"YEARMO": enrollments_data["YEARMO"],
	}
	services_df = pd.DataFrame(services_data)

	return enrollments_df, members_df, providers_df, services_df


	# Streamlit App
	st.title("Synthetic Medical Billing Data Generator")

	# Slider for number of members
	num_members = st.slider("Select number of unique members:", min_value=10, max_value=1000, step=10, value=100)

	# Generate synthetic data
	enrollments_df, members_df, providers_df, services_df = generate_synthetic_data(num_members)

	# Display dataframes
	st.subheader("Preview of Generated Data")
	st.write("Enrollments Data")
	st.dataframe(enrollments_df.head())
	st.write("Members Data")
	st.dataframe(members_df.head())
	st.write("Providers Data")
	st.dataframe(providers_df.head())
	st.write("Services Data")
	st.dataframe(services_df.head())

	# Allow downloading the generated files
	st.subheader("Download Synthetic Data")
	st.download_button(
	label="Download Enrollments Data",
	data=enrollments_df.to_csv(index=False),
	file_name="Synthetic_Enrollments.csv",
	mime="text/csv",
	)
	st.download_button(
	label="Download Members Data",
	data=members_df.to_csv(index=False),
	file_name="Synthetic_Members.csv",
	mime="text/csv",
	)
	st.download_button(
	label="Download Providers Data",
	data=providers_df.to_csv(index=False),
	file_name="Synthetic_Providers.csv",
	mime="text/csv",
	)
	st.download_button(
	label="Download Services Data",
	data=services_df.to_csv(index=False),
	file_name="Synthetic_Services.csv",
	mime="text/csv",
	)