Upload 3_DataGen.py
Browse files- pages/3_DataGen.py +69 -0
pages/3_DataGen.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
from io import BytesIO
|
6 |
+
|
7 |
+
# Title of the App
|
8 |
+
st.title("Random Data Generator")
|
9 |
+
|
10 |
+
# Sidebar for User Inputs
|
11 |
+
st.sidebar.header("Settings")
|
12 |
+
|
13 |
+
# Distribution Selector
|
14 |
+
distribution = st.sidebar.selectbox(
|
15 |
+
"Select a Distribution",
|
16 |
+
["Uniform", "Bernoulli", "Gaussian", "Discrete"]
|
17 |
+
)
|
18 |
+
|
19 |
+
# Number of Data Points
|
20 |
+
n_points = st.sidebar.number_input(
|
21 |
+
"Number of Data Points", min_value=1, max_value=1000000, value=1000, step=1000
|
22 |
+
)
|
23 |
+
|
24 |
+
# Parameters for Distributions
|
25 |
+
if distribution == "Uniform":
|
26 |
+
low = st.sidebar.number_input("Lower Bound", value=0.0)
|
27 |
+
high = st.sidebar.number_input("Upper Bound", value=1.0)
|
28 |
+
data = np.random.uniform(low, high, n_points)
|
29 |
+
|
30 |
+
elif distribution == "Bernoulli":
|
31 |
+
p = st.sidebar.slider("Probability (p)", min_value=0.0, max_value=1.0, value=0.5)
|
32 |
+
data = np.random.binomial(1, p, n_points)
|
33 |
+
|
34 |
+
elif distribution == "Gaussian":
|
35 |
+
mean = st.sidebar.number_input("Mean", value=0.0)
|
36 |
+
std_dev = st.sidebar.number_input("Standard Deviation", value=1.0)
|
37 |
+
data = np.random.normal(mean, std_dev, n_points)
|
38 |
+
|
39 |
+
elif distribution == "Discrete":
|
40 |
+
low = st.sidebar.number_input("Lower Bound", value=0)
|
41 |
+
high = st.sidebar.number_input("Upper Bound", value=10)
|
42 |
+
data = np.random.randint(low, high + 1, n_points)
|
43 |
+
|
44 |
+
|
45 |
+
# Visualization
|
46 |
+
st.write("### Visualization")
|
47 |
+
fig, ax = plt.subplots()
|
48 |
+
ax.hist(data, bins=30, alpha=0.75, color="skyblue", edgecolor="black")
|
49 |
+
ax.set_title(f"Histogram of {distribution} Distribution")
|
50 |
+
ax.set_xlabel("Value")
|
51 |
+
ax.set_ylabel("Frequency")
|
52 |
+
st.pyplot(fig)
|
53 |
+
|
54 |
+
|
55 |
+
# Display the Generated Data
|
56 |
+
st.write(f"### {distribution} Distribution - {n_points} Points")
|
57 |
+
df = pd.DataFrame(data, columns=["Value"])
|
58 |
+
st.dataframe(df)
|
59 |
+
|
60 |
+
with st.sidebar:
|
61 |
+
# Download the Data
|
62 |
+
st.write("### Download Data")
|
63 |
+
csv = df.to_csv(index=False).encode("utf-8")
|
64 |
+
st.download_button(
|
65 |
+
label="Download CSV",
|
66 |
+
data=csv,
|
67 |
+
file_name=f"{distribution}_data.csv",
|
68 |
+
mime="text/csv",
|
69 |
+
)
|