SPACE / pages /3_DataGen.py
lee-ite's picture
Upload 3_DataGen.py
9f27dc5 verified
import streamlit as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from io import BytesIO
# Title of the App
st.title("Random Data Generator")
# Sidebar for User Inputs
st.sidebar.header("Settings")
# Distribution Selector
distribution = st.sidebar.selectbox(
"Select a Distribution",
["Uniform", "Bernoulli", "Gaussian", "Discrete"]
)
# Number of Data Points
n_points = st.sidebar.number_input(
"Number of Data Points", min_value=1, max_value=1000000, value=1000, step=1000
)
# Parameters for Distributions
if distribution == "Uniform":
low = st.sidebar.number_input("Lower Bound", value=0.0)
high = st.sidebar.number_input("Upper Bound", value=1.0)
data = np.random.uniform(low, high, n_points)
elif distribution == "Bernoulli":
p = st.sidebar.slider("Probability (p)", min_value=0.0, max_value=1.0, value=0.5)
data = np.random.binomial(1, p, n_points)
elif distribution == "Gaussian":
mean = st.sidebar.number_input("Mean", value=0.0)
std_dev = st.sidebar.number_input("Standard Deviation", value=1.0)
data = np.random.normal(mean, std_dev, n_points)
elif distribution == "Discrete":
low = st.sidebar.number_input("Lower Bound", value=0)
high = st.sidebar.number_input("Upper Bound", value=10)
data = np.random.randint(low, high + 1, n_points)
# Visualization
st.write("### Visualization")
fig, ax = plt.subplots()
ax.hist(data, bins=30, alpha=0.75, color="skyblue", edgecolor="black")
ax.set_title(f"Histogram of {distribution} Distribution")
ax.set_xlabel("Value")
ax.set_ylabel("Frequency")
st.pyplot(fig)
# Display the Generated Data
st.write(f"### {distribution} Distribution - {n_points} Points")
df = pd.DataFrame(data, columns=["Value"])
st.dataframe(df)
with st.sidebar:
# Download the Data
st.write("### Download Data")
csv = df.to_csv(index=False).encode("utf-8")
st.download_button(
label="Download CSV",
data=csv,
file_name=f"{distribution}_data.csv",
mime="text/csv",
)