Spaces:
Runtime error
Runtime error
init
Browse files- app.py +45 -0
- ml_algorithms/k_means.py +64 -0
- ml_algorithms/linear_regression_gradient_descent.py +172 -0
- page_config.py +27 -0
- requirements.txt +3 -0
app.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
from page_config import APP_PAGE_HEADER
|
4 |
+
from ml_algorithms.linear_regression_gradient_descent import app as lrgd_app
|
5 |
+
|
6 |
+
APP_PAGE_HEADER()
|
7 |
+
|
8 |
+
with st.expander("Linear Regression using Gradient Descent"):
|
9 |
+
lrgd_app()
|
10 |
+
|
11 |
+
|
12 |
+
def app2():
|
13 |
+
import streamlit as st
|
14 |
+
import numpy as np
|
15 |
+
import matplotlib.pyplot as plt
|
16 |
+
|
17 |
+
st.write("*** Program Started ***")
|
18 |
+
|
19 |
+
n = 50
|
20 |
+
x = np.arange(-n / 2, n / 2, 1, dtype=np.float64)
|
21 |
+
|
22 |
+
m = np.random.uniform(0.3, 0.5, (n,))
|
23 |
+
b = np.random.uniform(5, 10, (n,))
|
24 |
+
|
25 |
+
y = x * m + b
|
26 |
+
print("x", x, type(x[0]))
|
27 |
+
print("y", y, type(y[0]))
|
28 |
+
|
29 |
+
plt.scatter(
|
30 |
+
x,
|
31 |
+
y,
|
32 |
+
s=None,
|
33 |
+
marker="o",
|
34 |
+
color="g",
|
35 |
+
edgecolors="g",
|
36 |
+
alpha=0.9,
|
37 |
+
label="Linear Relation",
|
38 |
+
)
|
39 |
+
plt.grid(color="black", linestyle="--", linewidth=0.5, markevery=int)
|
40 |
+
plt.legend(loc=2)
|
41 |
+
plt.axis("scaled")
|
42 |
+
st.pyplot(plt.show())
|
43 |
+
|
44 |
+
|
45 |
+
# app2()
|
ml_algorithms/k_means.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# src: https://gist.github.com/iamaziz/ff570a6826b6d56c32b9d497a73e688c
|
2 |
+
# src: https://gist.github.com/iamaziz/0786e3de174c79839e42a5926f25acb2
|
3 |
+
def distance(u, v):
|
4 |
+
"""
|
5 |
+
Calculates Euclidean distance between two point
|
6 |
+
distance = square_root( sum(u_i - v_i)^2 )
|
7 |
+
|
8 |
+
u: [float, float], point1
|
9 |
+
v: [float, float], point2
|
10 |
+
"""
|
11 |
+
sum_ = sum((u[i] - v[i]) ** 2 for i in range(len(u)))
|
12 |
+
return sum_ ** (1 / 2)
|
13 |
+
|
14 |
+
|
15 |
+
def get_closer(target, *args):
|
16 |
+
"""
|
17 |
+
Return the closest point (from points in `args`) to target
|
18 |
+
|
19 |
+
target: [float], target point
|
20 |
+
*args: [[float]], list of points
|
21 |
+
"""
|
22 |
+
min_distance = float("inf")
|
23 |
+
closer = target
|
24 |
+
for point in args:
|
25 |
+
d = distance(point, target)
|
26 |
+
if d < min_distance:
|
27 |
+
min_distance = d
|
28 |
+
closer = point
|
29 |
+
return closer
|
30 |
+
|
31 |
+
|
32 |
+
def get_center(cluster):
|
33 |
+
"""
|
34 |
+
Calculates the centroid point for `cluster`
|
35 |
+
|
36 |
+
cluster: [[float]], list of the points in cluster
|
37 |
+
"""
|
38 |
+
center = []
|
39 |
+
n = len(cluster)
|
40 |
+
for i in range(len(cluster[0])):
|
41 |
+
c = sum(p[i] for p in cluster) / n
|
42 |
+
center.append(round(c, 1))
|
43 |
+
return center
|
44 |
+
|
45 |
+
|
46 |
+
def k_means(data, k=2, *centers):
|
47 |
+
"""
|
48 |
+
Recursive k_means algorithm
|
49 |
+
|
50 |
+
data: [[float]], data points to consider for clustering
|
51 |
+
k: int, number of clusters
|
52 |
+
centers: [[float]], optiona - initial centroids
|
53 |
+
"""
|
54 |
+
centers = list(centers) if centers else [data[i] for i in range(k)]
|
55 |
+
clusters = [[] for _ in range(k)]
|
56 |
+
for point in data:
|
57 |
+
nearest = get_closer(point, *centers)
|
58 |
+
nearest_cluster_index = centers.index(nearest)
|
59 |
+
clusters[nearest_cluster_index].append(point)
|
60 |
+
|
61 |
+
new_centers = [get_center(cluster) for cluster in clusters]
|
62 |
+
if centers == new_centers:
|
63 |
+
return clusters, centers
|
64 |
+
return k_means(data, k, *new_centers)
|
ml_algorithms/linear_regression_gradient_descent.py
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# src: https://gist.github.com/iamaziz/ea5863beaee090937fd6828e88653f5e
|
2 |
+
|
3 |
+
|
4 |
+
class LinearRegressionGradient:
|
5 |
+
def __init__(self, theta=None):
|
6 |
+
self.theta = theta
|
7 |
+
self.loss_ = float("inf")
|
8 |
+
|
9 |
+
def hypothesis(self, x):
|
10 |
+
return self.theta[0] + self.theta[1] * x
|
11 |
+
|
12 |
+
def loss(self, X, y):
|
13 |
+
m = len(X)
|
14 |
+
return sum([(X[i] - y[i]) ** 2 for i in range(m)]) / (2 * m)
|
15 |
+
|
16 |
+
def gradientDescent(self, X, y, theta, num_iter=3000, alpha=0.01):
|
17 |
+
m = len(X)
|
18 |
+
|
19 |
+
for j in range(num_iter):
|
20 |
+
|
21 |
+
# predict
|
22 |
+
h = list(map(self.hypothesis, X))
|
23 |
+
|
24 |
+
# compute slope, aka derivative with current params (theta)
|
25 |
+
deri_th0 = sum([(h[i] - y[i]) for i in range(m)]) / m
|
26 |
+
deri_th1 = sum([(h[i] - y[i]) * X[i] for i in range(m)]) / m
|
27 |
+
|
28 |
+
# update parameters (moving against the gradient 'derivative')
|
29 |
+
theta[0] = theta[0] - alpha * deri_th0
|
30 |
+
theta[1] = theta[1] - alpha * deri_th1
|
31 |
+
|
32 |
+
# report
|
33 |
+
if j % 200 == 0:
|
34 |
+
self.loss_ = self.loss(X, y)
|
35 |
+
msg = f"loss: {self.loss_}"
|
36 |
+
print(msg)
|
37 |
+
|
38 |
+
|
39 |
+
def app():
|
40 |
+
import streamlit as st
|
41 |
+
|
42 |
+
def header():
|
43 |
+
st.subheader("Linear Regression using Gradient Descent")
|
44 |
+
desc = """> Plain Python (vanilla version) i.e. without importing any library"""
|
45 |
+
st.markdown(desc)
|
46 |
+
|
47 |
+
header()
|
48 |
+
|
49 |
+
st1, st2 = st.columns(2)
|
50 |
+
with st1:
|
51 |
+
code_math()
|
52 |
+
with st2:
|
53 |
+
interactive_run()
|
54 |
+
|
55 |
+
st.markdown(
|
56 |
+
f"> source [notebook](https://gist.github.com/iamaziz/ea5863beaee090937fd6828e88653f5e)."
|
57 |
+
)
|
58 |
+
|
59 |
+
|
60 |
+
def code_math():
|
61 |
+
import inspect
|
62 |
+
import streamlit as st
|
63 |
+
|
64 |
+
tex = st.latex
|
65 |
+
write = st.write
|
66 |
+
mark = st.write
|
67 |
+
codify = lambda func: st.code(inspect.getsource(func), language="python")
|
68 |
+
cls = LinearRegressionGradient(theta=[0, 0])
|
69 |
+
|
70 |
+
write("The class")
|
71 |
+
codify(cls.__init__)
|
72 |
+
|
73 |
+
write("the Hypothesis")
|
74 |
+
tex(r"""h_\theta(x) = \theta_0 + \theta_1x""")
|
75 |
+
codify(cls.hypothesis)
|
76 |
+
mark('The Loss/Objective/Cost function "_minimize_"')
|
77 |
+
tex(r"""J(\theta_0, \theta_1) = \frac{1}{2m}\sum(h_\theta(x^{(i)}) - y^{(i)})^2""")
|
78 |
+
codify(cls.loss)
|
79 |
+
write("The Gradient Descent algorithm")
|
80 |
+
mark("> repeat until converge {")
|
81 |
+
tex(
|
82 |
+
r"""\theta_0 = \theta_0 - \alpha \frac{1}{m} \sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)} )"""
|
83 |
+
)
|
84 |
+
tex(
|
85 |
+
r"""\theta_1 = \theta_1 - \alpha \frac{1}{m} \sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)}) x^{(i)})"""
|
86 |
+
)
|
87 |
+
mark("> }")
|
88 |
+
codify(cls.gradientDescent)
|
89 |
+
|
90 |
+
|
91 |
+
def interactive_run():
|
92 |
+
import streamlit as st
|
93 |
+
import numpy as np
|
94 |
+
|
95 |
+
mark = st.markdown
|
96 |
+
tex = st.latex
|
97 |
+
|
98 |
+
def random_data(n=10):
|
99 |
+
def sample_linear_regression_dataset(n):
|
100 |
+
# src: https://www.gaussianwaves.com/2020/01/generating-simulated-dataset-for-regression-problems-sklearn-make_regression/
|
101 |
+
import numpy as np
|
102 |
+
from sklearn import datasets
|
103 |
+
import matplotlib.pyplot as plt # for plotting
|
104 |
+
|
105 |
+
x, y, coef = datasets.make_regression(
|
106 |
+
n_samples=n, # number of samples
|
107 |
+
n_features=1, # number of features
|
108 |
+
n_informative=1, # number of useful features
|
109 |
+
noise=40, # bias and standard deviation of the guassian noise
|
110 |
+
coef=True, # true coefficient used to generated the data
|
111 |
+
random_state=0,
|
112 |
+
) # set for same data points for each run
|
113 |
+
|
114 |
+
# Scale feature x (years of experience) to range 0..20
|
115 |
+
# x = np.interp(x, (x.min(), x.max()), (0, 20))
|
116 |
+
|
117 |
+
# Scale target y (salary) to range 20000..150000
|
118 |
+
# y = np.interp(y, (y.min(), y.max()), (20000, 150000))
|
119 |
+
|
120 |
+
plt.ion() # interactive plot on
|
121 |
+
plt.plot(x, y, ".", label="training data")
|
122 |
+
plt.xlabel("Years of experience")
|
123 |
+
plt.ylabel("Salary $")
|
124 |
+
plt.title("Experience Vs. Salary")
|
125 |
+
# st.pyplot(plt.show())
|
126 |
+
# st.write(type(x.tolist()))
|
127 |
+
# st.write(x.tolist())
|
128 |
+
|
129 |
+
X, y = x.reshape(x.shape[0],), y.reshape(
|
130 |
+
y.shape[0],
|
131 |
+
)
|
132 |
+
return np.around(X, 2), np.around(y, 2)
|
133 |
+
# return [a[0] for a in x.tolist()], [a[0] for a in y.tolist()]
|
134 |
+
# return [item for sublist in x.tolist() for item in sublist], [
|
135 |
+
# item for sublist in y for item in sublist
|
136 |
+
# ]
|
137 |
+
|
138 |
+
X_, y_ = sample_linear_regression_dataset(n)
|
139 |
+
return X_, y_
|
140 |
+
# st.write(type(X_), type(y_))
|
141 |
+
# st.write(type(np.round(X, 2).tolist()))
|
142 |
+
# st.write(X_) # , y_)
|
143 |
+
# return X, y
|
144 |
+
|
145 |
+
# return np.around(X, 2).tolist(), np.around(y, 2).tolist()
|
146 |
+
|
147 |
+
X, y = random_data()
|
148 |
+
theta = [0, 0] # initial values
|
149 |
+
model = LinearRegressionGradient(theta)
|
150 |
+
n = st.slider("Number of samples", min_value=10, max_value=200, step=10)
|
151 |
+
if st.button("generate new data"):
|
152 |
+
X, y = random_data(n=n)
|
153 |
+
mark("_Input_")
|
154 |
+
mark(f"_X_ = {X}")
|
155 |
+
mark(f"_y_ = {y}")
|
156 |
+
model.gradientDescent(X, y, theta) # run to optimize thetas
|
157 |
+
mark("_Solution_")
|
158 |
+
tex(f"y = {model.theta[0]:.1f} + {model.theta[1]:.1f} x") # print solution
|
159 |
+
tex(f"loss = {model.loss_}")
|
160 |
+
|
161 |
+
# -- visualize
|
162 |
+
import matplotlib.pyplot as plt
|
163 |
+
|
164 |
+
fig, ax = plt.subplots()
|
165 |
+
ax.scatter(X, y, label="Linear Relation")
|
166 |
+
y_pred = theta[0] + theta[1] * np.array(X)
|
167 |
+
ax.plot(X, y_pred)
|
168 |
+
ax.grid(color="black", linestyle="--", linewidth=0.5, markevery=int)
|
169 |
+
ax.legend(loc=2)
|
170 |
+
# ax.axis("scaled")
|
171 |
+
st.pyplot(fig)
|
172 |
+
# st.line_chart(X, y)
|
page_config.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import date
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
|
5 |
+
|
6 |
+
def APP_PAGE_HEADER():
|
7 |
+
st.set_page_config(
|
8 |
+
page_title="ML Algorithms",
|
9 |
+
page_icon=":camel:",
|
10 |
+
layout="wide",
|
11 |
+
initial_sidebar_state="collapsed",
|
12 |
+
)
|
13 |
+
|
14 |
+
hide_style = """
|
15 |
+
<style>
|
16 |
+
#MainMenu {visibility: hidden;}
|
17 |
+
footer {visibility: hidden;}
|
18 |
+
</style>
|
19 |
+
"""
|
20 |
+
st.markdown(hide_style, unsafe_allow_html=True)
|
21 |
+
HEADER()
|
22 |
+
|
23 |
+
|
24 |
+
def HEADER():
|
25 |
+
today = date.today()
|
26 |
+
st.header("_Simple ML Algorithms explained in Math & Code_")
|
27 |
+
st.write(str(today))
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
matplotlib
|
3 |
+
scikit-learn
|