Spaces:
Sleeping
Sleeping
Jensen-holm
commited on
Commit
•
6d0453c
1
Parent(s):
6307b4f
latest update, finishing classification with MNIST! More details on
Browse files- app.py +114 -37
- nn/activation.py +1 -4
- nn/loss.py +30 -5
- nn/nn.py +75 -73
- nn/test.py +0 -30
- vis.py +67 -17
app.py
CHANGED
@@ -1,71 +1,117 @@
|
|
|
|
1 |
import plotly.express as px
|
2 |
from sklearn import datasets
|
3 |
-
from sklearn.preprocessing import
|
4 |
from sklearn.model_selection import train_test_split
|
5 |
import numpy as np
|
6 |
import gradio as gr
|
7 |
-
|
8 |
import nn # custom neural network module
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
|
11 |
-
def
|
12 |
seed: int,
|
13 |
) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
y = OneHotEncoder().fit_transform(y.reshape(-1, 1)).toarray()
|
21 |
-
return train_test_split(
|
22 |
-
X,
|
23 |
y,
|
24 |
test_size=0.2,
|
25 |
random_state=seed,
|
26 |
)
|
|
|
27 |
|
28 |
|
29 |
-
X_train, X_test, y_train, y_test =
|
30 |
|
31 |
|
32 |
-
def
|
33 |
Seed: int = 0,
|
|
|
34 |
Activation_Func: str = "SoftMax",
|
35 |
-
Loss_Func: str = "
|
36 |
Epochs: int = 100,
|
37 |
Hidden_Size: int = 8,
|
38 |
-
Learning_Rate: float = 0.
|
39 |
-
) -> gr.Plot:
|
|
|
|
|
|
|
40 |
|
41 |
-
|
42 |
epochs=Epochs,
|
43 |
learning_rate=Learning_Rate,
|
44 |
-
|
45 |
-
|
|
|
46 |
hidden_size=Hidden_Size,
|
47 |
-
input_size=
|
48 |
-
output_size=
|
49 |
seed=Seed,
|
50 |
)
|
|
|
51 |
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
56 |
)
|
57 |
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
|
61 |
if __name__ == "__main__":
|
62 |
with gr.Blocks() as interface:
|
63 |
gr.Markdown("# Backpropagation Playground")
|
|
|
|
|
|
|
64 |
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
|
|
67 |
with gr.Row():
|
68 |
-
data_plt =
|
69 |
gr.Plot(data_plt)
|
70 |
|
71 |
with gr.Row():
|
@@ -75,34 +121,65 @@ if __name__ == "__main__":
|
|
75 |
with gr.Row():
|
76 |
with gr.Column():
|
77 |
numeric_inputs = [
|
78 |
-
gr.Slider(
|
|
|
|
|
79 |
gr.Slider(
|
80 |
minimum=2, maximum=64, step=2, label="Hidden Network Size"
|
81 |
),
|
82 |
gr.Number(minimum=0.00001, maximum=1.5, label="Learning Rate"),
|
83 |
]
|
|
|
84 |
with gr.Column():
|
85 |
fn_inputs = [
|
86 |
gr.Dropdown(
|
87 |
-
choices=["
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
),
|
89 |
-
gr.Dropdown(choices=["CrossEntropy"], label="Loss Function"),
|
90 |
]
|
91 |
|
|
|
92 |
with gr.Row():
|
93 |
train_btn = gr.Button("Train", variant="primary")
|
94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
# outputs in row below inputs
|
96 |
with gr.Row():
|
97 |
-
plt_outputs = [
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
train_btn.click(
|
100 |
-
fn=
|
101 |
-
inputs=
|
102 |
-
outputs=plt_outputs,
|
103 |
)
|
104 |
|
105 |
with gr.Tab("Regression"):
|
106 |
-
|
107 |
|
108 |
interface.launch(show_error=True)
|
|
|
1 |
+
from nn.activation import SoftMax
|
2 |
import plotly.express as px
|
3 |
from sklearn import datasets
|
4 |
+
from sklearn.preprocessing import OneHotEncoder
|
5 |
from sklearn.model_selection import train_test_split
|
6 |
import numpy as np
|
7 |
import gradio as gr
|
8 |
+
|
9 |
import nn # custom neural network module
|
10 |
+
from vis import ( # classification visualization funcitons
|
11 |
+
show_digits,
|
12 |
+
hits_and_misses,
|
13 |
+
loss_history_plt,
|
14 |
+
make_confidence_label,
|
15 |
+
)
|
16 |
|
17 |
|
18 |
+
def _preprocess_digits(
|
19 |
seed: int,
|
20 |
) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
21 |
+
digits = datasets.load_digits()
|
22 |
+
n_samples = len(digits.images)
|
23 |
+
data = digits.images.reshape((n_samples, -1))
|
24 |
+
y = OneHotEncoder().fit_transform(digits.target.reshape(-1, 1)).toarray()
|
25 |
+
X_train, X_test, y_train, y_test = train_test_split(
|
26 |
+
data,
|
|
|
|
|
|
|
27 |
y,
|
28 |
test_size=0.2,
|
29 |
random_state=seed,
|
30 |
)
|
31 |
+
return X_train, X_test, y_train, y_test
|
32 |
|
33 |
|
34 |
+
X_train, X_test, y_train, y_test = _preprocess_digits(seed=1)
|
35 |
|
36 |
|
37 |
+
def classification(
|
38 |
Seed: int = 0,
|
39 |
+
Hidden_Layer_Activation: str = "Relu",
|
40 |
Activation_Func: str = "SoftMax",
|
41 |
+
Loss_Func: str = "CrossEntropyWithLogitsLoss",
|
42 |
Epochs: int = 100,
|
43 |
Hidden_Size: int = 8,
|
44 |
+
Learning_Rate: float = 0.001,
|
45 |
+
) -> tuple[gr.Plot, gr.Plot, gr.Label]:
|
46 |
+
assert Activation_Func in nn.ACTIVATIONS
|
47 |
+
assert Hidden_Layer_Activation in nn.ACTIVATIONS
|
48 |
+
assert Loss_Func in nn.LOSSES
|
49 |
|
50 |
+
classifier = nn.NN(
|
51 |
epochs=Epochs,
|
52 |
learning_rate=Learning_Rate,
|
53 |
+
hidden_activation_fn=nn.ACTIVATIONS[Hidden_Layer_Activation],
|
54 |
+
activation_fn=nn.ACTIVATIONS[Activation_Func],
|
55 |
+
loss_fn=nn.LOSSES[Loss_Func],
|
56 |
hidden_size=Hidden_Size,
|
57 |
+
input_size=64, # 8x8 image of pixels
|
58 |
+
output_size=10, # digits 0-9
|
59 |
seed=Seed,
|
60 |
)
|
61 |
+
classifier.train(X_train=X_train, y_train=y_train)
|
62 |
|
63 |
+
pred = classifier.predict(X_test=X_test)
|
64 |
+
hits_and_misses_fig = hits_and_misses(y_pred=pred, y_true=y_test)
|
65 |
+
loss_fig = loss_history_plt(
|
66 |
+
loss_history=classifier._loss_history,
|
67 |
+
loss_fn_name=classifier.loss_fn.__class__.__name__,
|
68 |
)
|
69 |
|
70 |
+
label_dict = make_confidence_label(y_pred=pred, y_test=y_test)
|
71 |
+
return (
|
72 |
+
gr.Plot(loss_fig, show_label=False),
|
73 |
+
gr.Plot(hits_and_misses_fig, show_label=False),
|
74 |
+
gr.Label(label_dict, label="Classification Confidence Rankings"),
|
75 |
+
)
|
76 |
|
77 |
|
78 |
if __name__ == "__main__":
|
79 |
with gr.Blocks() as interface:
|
80 |
gr.Markdown("# Backpropagation Playground")
|
81 |
+
gr.Markdown(
|
82 |
+
"""
|
83 |
+
## What is this? <br>
|
84 |
|
85 |
+
The Backpropagation Playground is a GUI built around a neural network framework that I have built from scratch
|
86 |
+
in [numpy](https://numpy.org/). In this GUI, you can test different hyper parameters that will be fed to this framework and used
|
87 |
+
to train a neural network on the [MNIST](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) dataset of 8x8 pixel images.
|
88 |
+
|
89 |
+
## ⚠️ PLEASE READ ⚠️
|
90 |
+
This application is impossibly slow on the HuggingFace CPU instance that it is running on. It is advised to clone the
|
91 |
+
repository and run it locally.
|
92 |
+
|
93 |
+
In order to get a decent classification score on the validation set of the MNIST data (hard coded to 20%), you will have to
|
94 |
+
do somewhere between 15,000 epochs and 50,000 epochs with a learning rate around 0.001, and a hidden layer size
|
95 |
+
over 10. (roughly the example that I have provided). Running this many epochs with a hidden layer of that size
|
96 |
+
is pretty expensive on 2 cpu cores that this space has. So if you are actually curious, you might want to clone
|
97 |
+
this and run it locally because it will be much much faster.
|
98 |
+
|
99 |
+
`git clone https://huggingface.co/spaces/Jensen-holm/Backprop-Playground`
|
100 |
+
|
101 |
+
After cloning, you will have to install the dependencies from requirements.txt into your environment. (venv reccommended)
|
102 |
+
|
103 |
+
`pip3 install -r requirements.txt`
|
104 |
+
|
105 |
+
Then, you can run the application on localhost with the following command.
|
106 |
+
|
107 |
+
`python3 app.py`
|
108 |
+
|
109 |
+
"""
|
110 |
+
)
|
111 |
|
112 |
+
with gr.Tab("Classification"):
|
113 |
with gr.Row():
|
114 |
+
data_plt = show_digits()
|
115 |
gr.Plot(data_plt)
|
116 |
|
117 |
with gr.Row():
|
|
|
121 |
with gr.Row():
|
122 |
with gr.Column():
|
123 |
numeric_inputs = [
|
124 |
+
gr.Slider(
|
125 |
+
minimum=100, maximum=100_000, step=50, label="Epochs"
|
126 |
+
),
|
127 |
gr.Slider(
|
128 |
minimum=2, maximum=64, step=2, label="Hidden Network Size"
|
129 |
),
|
130 |
gr.Number(minimum=0.00001, maximum=1.5, label="Learning Rate"),
|
131 |
]
|
132 |
+
|
133 |
with gr.Column():
|
134 |
fn_inputs = [
|
135 |
gr.Dropdown(
|
136 |
+
choices=["Relu", "Sigmoid", "TanH"],
|
137 |
+
label="Hidden Layer Activation",
|
138 |
+
),
|
139 |
+
gr.Dropdown(choices=["SoftMax"], label="Output Activation"),
|
140 |
+
gr.Dropdown(
|
141 |
+
choices=["CrossEntropy", "CrossEntropyWithLogitsLoss"],
|
142 |
+
label="Loss Function",
|
143 |
),
|
|
|
144 |
]
|
145 |
|
146 |
+
inputs = seed_input + fn_inputs + numeric_inputs
|
147 |
with gr.Row():
|
148 |
train_btn = gr.Button("Train", variant="primary")
|
149 |
|
150 |
+
with gr.Row():
|
151 |
+
gr.Examples(
|
152 |
+
examples=[
|
153 |
+
[
|
154 |
+
2,
|
155 |
+
"Relu",
|
156 |
+
"SoftMax",
|
157 |
+
"CrossEntropyWithLogitsLoss",
|
158 |
+
15_000,
|
159 |
+
14,
|
160 |
+
0.001,
|
161 |
+
]
|
162 |
+
],
|
163 |
+
inputs=inputs,
|
164 |
+
)
|
165 |
+
|
166 |
# outputs in row below inputs
|
167 |
with gr.Row():
|
168 |
+
plt_outputs = [
|
169 |
+
gr.Plot(label="Loss History / Epoch"),
|
170 |
+
gr.Plot(label="Hits & Misses"),
|
171 |
+
]
|
172 |
+
|
173 |
+
with gr.Row():
|
174 |
+
label_output = [gr.Label(label="Class Confidences")]
|
175 |
|
176 |
train_btn.click(
|
177 |
+
fn=classification,
|
178 |
+
inputs=inputs,
|
179 |
+
outputs=plt_outputs + label_output,
|
180 |
)
|
181 |
|
182 |
with gr.Tab("Regression"):
|
183 |
+
gr.Markdown("### Coming Soon")
|
184 |
|
185 |
interface.launch(show_error=True)
|
nn/activation.py
CHANGED
@@ -2,9 +2,6 @@ import numpy as np
|
|
2 |
from abc import abstractmethod, ABC
|
3 |
|
4 |
|
5 |
-
__all__ = ["Activation", "Relu", "TanH", "Sigmoid", "SoftMax", "ACTIVATIONS"]
|
6 |
-
|
7 |
-
|
8 |
class Activation(ABC):
|
9 |
@abstractmethod
|
10 |
def forward(self, X: np.ndarray) -> np.ndarray:
|
@@ -54,6 +51,6 @@ class SoftMax(Activation):
|
|
54 |
ACTIVATIONS: dict[str, Activation] = {
|
55 |
"Relu": Relu(),
|
56 |
"Sigmoid": Sigmoid(),
|
57 |
-
"
|
58 |
"SoftMax": SoftMax(),
|
59 |
}
|
|
|
2 |
from abc import abstractmethod, ABC
|
3 |
|
4 |
|
|
|
|
|
|
|
5 |
class Activation(ABC):
|
6 |
@abstractmethod
|
7 |
def forward(self, X: np.ndarray) -> np.ndarray:
|
|
|
51 |
ACTIVATIONS: dict[str, Activation] = {
|
52 |
"Relu": Relu(),
|
53 |
"Sigmoid": Sigmoid(),
|
54 |
+
"TanH": TanH(),
|
55 |
"SoftMax": SoftMax(),
|
56 |
}
|
nn/loss.py
CHANGED
@@ -3,9 +3,6 @@ from nn.activation import SoftMax
|
|
3 |
import numpy as np
|
4 |
|
5 |
|
6 |
-
__all__ = ["Loss", "MSE", "CrossEntropy", "LOSSES"]
|
7 |
-
|
8 |
-
|
9 |
class Loss(ABC):
|
10 |
@abstractmethod
|
11 |
def forward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
|
@@ -16,6 +13,10 @@ class Loss(ABC):
|
|
16 |
pass
|
17 |
|
18 |
|
|
|
|
|
|
|
|
|
19 |
class MSE(Loss):
|
20 |
def forward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
|
21 |
return np.sum(np.square(y_hat - y_true)) / y_true.shape[0]
|
@@ -30,21 +31,45 @@ class CrossEntropy(Loss):
|
|
30 |
y_true = np.asarray(y_true)
|
31 |
m = y_true.shape[0]
|
32 |
p = self._softmax(y_hat)
|
33 |
-
|
|
|
|
|
|
|
34 |
loss = np.sum(log_likelihood) / m
|
35 |
return loss
|
36 |
|
37 |
def backward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
|
38 |
y_hat = np.asarray(y_hat)
|
39 |
y_true = np.asarray(y_true)
|
40 |
-
|
|
|
41 |
|
42 |
@staticmethod
|
43 |
def _softmax(X: np.ndarray) -> np.ndarray:
|
44 |
return SoftMax().forward(X)
|
45 |
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
LOSSES: dict[str, Loss] = {
|
48 |
"MSE": MSE(),
|
49 |
"CrossEntropy": CrossEntropy(),
|
|
|
50 |
}
|
|
|
3 |
import numpy as np
|
4 |
|
5 |
|
|
|
|
|
|
|
6 |
class Loss(ABC):
|
7 |
@abstractmethod
|
8 |
def forward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
|
|
|
13 |
pass
|
14 |
|
15 |
|
16 |
+
class LogitsLoss(Loss):
|
17 |
+
pass
|
18 |
+
|
19 |
+
|
20 |
class MSE(Loss):
|
21 |
def forward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
|
22 |
return np.sum(np.square(y_hat - y_true)) / y_true.shape[0]
|
|
|
31 |
y_true = np.asarray(y_true)
|
32 |
m = y_true.shape[0]
|
33 |
p = self._softmax(y_hat)
|
34 |
+
eps = 1e-15 # to prevent log(0)
|
35 |
+
log_likelihood = -np.log(
|
36 |
+
np.clip(p[range(m), y_true.argmax(axis=1)], a_min=eps, a_max=None)
|
37 |
+
)
|
38 |
loss = np.sum(log_likelihood) / m
|
39 |
return loss
|
40 |
|
41 |
def backward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
|
42 |
y_hat = np.asarray(y_hat)
|
43 |
y_true = np.asarray(y_true)
|
44 |
+
grad = y_hat - y_true
|
45 |
+
return grad / y_true.shape[0]
|
46 |
|
47 |
@staticmethod
|
48 |
def _softmax(X: np.ndarray) -> np.ndarray:
|
49 |
return SoftMax().forward(X)
|
50 |
|
51 |
|
52 |
+
class CrossEntropyWithLogits(LogitsLoss):
|
53 |
+
def forward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
|
54 |
+
# Apply the log-sum-exp trick for numerical stability
|
55 |
+
max_logits = np.max(y_hat, axis=1, keepdims=True)
|
56 |
+
log_sum_exp = np.log(np.sum(np.exp(y_hat - max_logits), axis=1, keepdims=True))
|
57 |
+
log_probs = y_hat - max_logits - log_sum_exp
|
58 |
+
# Select the log probability of the true class
|
59 |
+
loss = -np.sum(log_probs * y_true) / y_true.shape[0]
|
60 |
+
return loss
|
61 |
+
|
62 |
+
def backward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
|
63 |
+
# Compute softmax probabilities
|
64 |
+
exps = np.exp(y_hat - np.max(y_hat, axis=1, keepdims=True))
|
65 |
+
probs = exps / np.sum(exps, axis=1, keepdims=True)
|
66 |
+
# Subtract the one-hot encoded labels from the probabilities
|
67 |
+
grad = (probs - y_true) / y_true.shape[0]
|
68 |
+
return grad
|
69 |
+
|
70 |
+
|
71 |
LOSSES: dict[str, Loss] = {
|
72 |
"MSE": MSE(),
|
73 |
"CrossEntropy": CrossEntropy(),
|
74 |
+
"CrossEntropyWithLogitsLoss": CrossEntropyWithLogits(),
|
75 |
}
|
nn/nn.py
CHANGED
@@ -1,55 +1,42 @@
|
|
1 |
-
from
|
2 |
-
|
3 |
-
from nn.loss import LOSSES, Loss
|
4 |
import numpy as np
|
5 |
|
6 |
-
import
|
|
|
7 |
|
8 |
|
9 |
DTYPE = np.float32
|
10 |
|
11 |
|
|
|
12 |
class NN:
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
# try to get activation function and loss funciton
|
32 |
-
act_fn = ACTIVATIONS.get(activation_fn, None)
|
33 |
-
if act_fn is None:
|
34 |
-
raise KeyError(f"Invalid Activation function '{activation_fn}'")
|
35 |
-
loss_fn = LOSSES.get(loss_fn, None)
|
36 |
-
if loss_fn is None:
|
37 |
-
raise KeyError(f"Invalid Activation function '{activation_fn}'")
|
38 |
-
self._activation_fn: Activation = act_fn
|
39 |
-
self._loss_fn: Loss = loss_fn
|
40 |
-
|
41 |
-
self._loss_history = list()
|
42 |
-
self._weight_history = {
|
43 |
"wo": [],
|
44 |
"wh": [],
|
45 |
"bo": [],
|
46 |
"bh": [],
|
47 |
-
}
|
|
|
|
|
48 |
|
49 |
-
|
50 |
-
self._wh: Optional[np.ndarray] = None
|
51 |
-
self._bo: Optional[np.ndarray] = None
|
52 |
-
self._bh: Optional[np.ndarray] = None
|
53 |
self._init_weights_and_biases()
|
54 |
|
55 |
def _init_weights_and_biases(self) -> None:
|
@@ -79,28 +66,37 @@ class NN:
|
|
79 |
)
|
80 |
return
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
def _forward(self, X_train: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
params:
|
87 |
-
X_train: np.ndarray -> data that we are training the NN on.
|
88 |
-
|
89 |
-
returns:
|
90 |
-
output layer np array containing the predicted outputs calculated using
|
91 |
-
the weights and biases of the current epoch.
|
92 |
-
"""
|
93 |
-
assert self._activation_fn is not None
|
94 |
-
|
95 |
-
# hidden layer
|
96 |
-
hidden_layer_output = self._activation_fn.forward(
|
97 |
-
np.dot(X_train, self._wh) + self._bh
|
98 |
)
|
99 |
-
#
|
100 |
-
|
101 |
-
|
102 |
-
)
|
103 |
-
return y_hat, hidden_layer_output
|
104 |
|
105 |
def _backward(
|
106 |
self,
|
@@ -109,22 +105,18 @@ class NN:
|
|
109 |
y_train: np.ndarray,
|
110 |
hidden_output: np.ndarray,
|
111 |
) -> None:
|
112 |
-
assert self._activation_fn is not None
|
113 |
assert self._wo is not None
|
114 |
-
assert self._loss_fn is not None
|
115 |
|
116 |
# Calculate the error at the output
|
117 |
# This should be the derivative of the loss function with respect to the output of the network
|
118 |
-
error_output = self.
|
119 |
-
y_hat, y_train
|
120 |
-
) * self._activation_fn.backward(y_hat)
|
121 |
|
122 |
# Calculate gradients for output layer weights and biases
|
123 |
wo_prime = np.dot(hidden_output.T, error_output) * self.learning_rate
|
124 |
bo_prime = np.sum(error_output, axis=0, keepdims=True) * self.learning_rate
|
125 |
|
126 |
# Propagate the error back to the hidden layer
|
127 |
-
error_hidden = np.dot(error_output, self._wo.T) * self.
|
128 |
hidden_output
|
129 |
)
|
130 |
|
@@ -132,18 +124,25 @@ class NN:
|
|
132 |
wh_prime = np.dot(X_train.T, error_hidden) * self.learning_rate
|
133 |
bh_prime = np.sum(error_hidden, axis=0, keepdims=True) * self.learning_rate
|
134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
# Update weights and biases
|
136 |
self._wo -= wo_prime
|
137 |
self._wh -= wh_prime
|
138 |
self._bo -= bo_prime
|
139 |
self._bh -= bh_prime
|
140 |
|
|
|
|
|
141 |
def train(self, X_train: np.ndarray, y_train: np.ndarray) -> "NN":
|
142 |
-
assert self._loss_fn is not None
|
143 |
-
|
144 |
for _ in gr.Progress().tqdm(range(self.epochs)):
|
145 |
y_hat, hidden_output = self._forward(X_train=X_train)
|
146 |
-
loss = self.
|
147 |
self._loss_history.append(loss)
|
148 |
self._backward(
|
149 |
X_train=X_train,
|
@@ -152,12 +151,15 @@ class NN:
|
|
152 |
hidden_output=hidden_output,
|
153 |
)
|
154 |
|
|
|
|
|
155 |
# keep track of weights an biases at each epoch for visualization
|
156 |
-
self._weight_history["wo"].append(self._wo[0, 0])
|
157 |
-
self._weight_history["wh"].append(self._wh[0, 0])
|
158 |
-
self._weight_history["bo"].append(self._bo[0, 0])
|
159 |
-
self._weight_history["bh"].append(self._bh[0, 0])
|
160 |
return self
|
161 |
|
162 |
def predict(self, X_test: np.ndarray) -> np.ndarray:
|
163 |
-
|
|
|
|
1 |
+
from dataclasses import dataclass, field
|
2 |
+
import gradio as gr
|
|
|
3 |
import numpy as np
|
4 |
|
5 |
+
from nn.activation import Activation, Relu, SoftMax
|
6 |
+
from nn.loss import Loss
|
7 |
|
8 |
|
9 |
DTYPE = np.float32
|
10 |
|
11 |
|
12 |
+
@dataclass
|
13 |
class NN:
|
14 |
+
epochs: int
|
15 |
+
learning_rate: float
|
16 |
+
hidden_size: int
|
17 |
+
input_size: int
|
18 |
+
output_size: int
|
19 |
+
hidden_activation_fn: Activation
|
20 |
+
activation_fn: Activation
|
21 |
+
loss_fn: Loss
|
22 |
+
seed: int
|
23 |
+
|
24 |
+
_loss_history: list = field(default_factory=lambda: [], init=False)
|
25 |
+
_wo: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
|
26 |
+
_wh: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
|
27 |
+
_bo: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
|
28 |
+
_bh: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
|
29 |
+
_weight_history: dict[str, list[np.ndarray]] = field(
|
30 |
+
default_factory=lambda: {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
"wo": [],
|
32 |
"wh": [],
|
33 |
"bo": [],
|
34 |
"bh": [],
|
35 |
+
},
|
36 |
+
init=False,
|
37 |
+
)
|
38 |
|
39 |
+
def __post_init__(self) -> None:
|
|
|
|
|
|
|
40 |
self._init_weights_and_biases()
|
41 |
|
42 |
def _init_weights_and_biases(self) -> None:
|
|
|
66 |
)
|
67 |
return
|
68 |
|
69 |
+
# def _forward(self, X_train: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
70 |
+
# # Determine the activation function for the hidden layer
|
71 |
+
# if self._activation_fn.__class__.__name__ == "SoftMax":
|
72 |
+
# # Using ReLU for hidden layer when softmax is used in output layer
|
73 |
+
# hidden_layer_activation = Sigmoid()
|
74 |
+
# else:
|
75 |
+
# # Use the specified activation function if not using softmax
|
76 |
+
# hidden_layer_activation = self._activation_fn
|
77 |
+
|
78 |
+
# # Compute the hidden layer output
|
79 |
+
# hidden_layer_output = hidden_layer_activation.forward(
|
80 |
+
# np.dot(X_train, self._wh) + self._bh
|
81 |
+
# )
|
82 |
+
|
83 |
+
# # Compute the output layer (prediction layer) using the specified activation function
|
84 |
+
# y_hat = self._activation_fn.forward(
|
85 |
+
# np.dot(hidden_layer_output, self._wo) + self._bo
|
86 |
+
# )
|
87 |
+
|
88 |
+
# return y_hat, hidden_layer_output
|
89 |
+
|
90 |
+
# TODO: make this forward function the main _forward function if
|
91 |
+
# the loss function that the user selected is a "logits" loss. Call
|
92 |
+
# The one above if it is not.
|
93 |
def _forward(self, X_train: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
94 |
+
hidden_layer_output = self.hidden_activation_fn.forward(
|
95 |
+
np.dot(X_train, self._wh) + self._bh,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
)
|
97 |
+
# Output layer does not apply softmax anymore, just return logits
|
98 |
+
logits = np.dot(hidden_layer_output, self._wo) + self._bo
|
99 |
+
return logits, hidden_layer_output
|
|
|
|
|
100 |
|
101 |
def _backward(
|
102 |
self,
|
|
|
105 |
y_train: np.ndarray,
|
106 |
hidden_output: np.ndarray,
|
107 |
) -> None:
|
|
|
108 |
assert self._wo is not None
|
|
|
109 |
|
110 |
# Calculate the error at the output
|
111 |
# This should be the derivative of the loss function with respect to the output of the network
|
112 |
+
error_output = self.loss_fn.backward(y_hat, y_train)
|
|
|
|
|
113 |
|
114 |
# Calculate gradients for output layer weights and biases
|
115 |
wo_prime = np.dot(hidden_output.T, error_output) * self.learning_rate
|
116 |
bo_prime = np.sum(error_output, axis=0, keepdims=True) * self.learning_rate
|
117 |
|
118 |
# Propagate the error back to the hidden layer
|
119 |
+
error_hidden = np.dot(error_output, self._wo.T) * self.activation_fn.backward(
|
120 |
hidden_output
|
121 |
)
|
122 |
|
|
|
124 |
wh_prime = np.dot(X_train.T, error_hidden) * self.learning_rate
|
125 |
bh_prime = np.sum(error_hidden, axis=0, keepdims=True) * self.learning_rate
|
126 |
|
127 |
+
# Gradient clipping to prevent overflow
|
128 |
+
max_norm = 1.0 # You can adjust this threshold
|
129 |
+
wo_prime = np.clip(wo_prime, -max_norm, max_norm)
|
130 |
+
bo_prime = np.clip(bo_prime, -max_norm, max_norm)
|
131 |
+
wh_prime = np.clip(wh_prime, -max_norm, max_norm)
|
132 |
+
bh_prime = np.clip(bh_prime, -max_norm, max_norm)
|
133 |
+
|
134 |
# Update weights and biases
|
135 |
self._wo -= wo_prime
|
136 |
self._wh -= wh_prime
|
137 |
self._bo -= bo_prime
|
138 |
self._bh -= bh_prime
|
139 |
|
140 |
+
# TODO: implement batch size in training, this will speed up the training loop
|
141 |
+
# quite a bit I believe
|
142 |
def train(self, X_train: np.ndarray, y_train: np.ndarray) -> "NN":
|
|
|
|
|
143 |
for _ in gr.Progress().tqdm(range(self.epochs)):
|
144 |
y_hat, hidden_output = self._forward(X_train=X_train)
|
145 |
+
loss = self.loss_fn.forward(y_hat=y_hat, y_true=y_train)
|
146 |
self._loss_history.append(loss)
|
147 |
self._backward(
|
148 |
X_train=X_train,
|
|
|
151 |
hidden_output=hidden_output,
|
152 |
)
|
153 |
|
154 |
+
# TODO: make a 3d visualization traversing loss plane. Might be too
|
155 |
+
# expenzive to do though.
|
156 |
# keep track of weights an biases at each epoch for visualization
|
157 |
+
# self._weight_history["wo"].append(self._wo[0, 0])
|
158 |
+
# self._weight_history["wh"].append(self._wh[0, 0])
|
159 |
+
# self._weight_history["bo"].append(self._bo[0, 0])
|
160 |
+
# self._weight_history["bh"].append(self._bh[0, 0])
|
161 |
return self
|
162 |
|
163 |
def predict(self, X_test: np.ndarray) -> np.ndarray:
|
164 |
+
pred, _ = self._forward(X_test)
|
165 |
+
return self.activation_fn.forward(pred)
|
nn/test.py
DELETED
@@ -1,30 +0,0 @@
|
|
1 |
-
from nn.nn import NN
|
2 |
-
import unittest
|
3 |
-
|
4 |
-
TEST_NN = NN(
|
5 |
-
epochs=100,
|
6 |
-
learning_rate=0.001,
|
7 |
-
hidden_size=8,
|
8 |
-
input_size=2,
|
9 |
-
output_size=1,
|
10 |
-
activation_fn="Sigmoid",
|
11 |
-
loss_fn="MSE",
|
12 |
-
)
|
13 |
-
|
14 |
-
|
15 |
-
class TestNN(unittest.TestCase):
|
16 |
-
def test_init_w_b(self) -> None:
|
17 |
-
return
|
18 |
-
|
19 |
-
def test_forward(self) -> None:
|
20 |
-
return
|
21 |
-
|
22 |
-
def test_backward(self) -> None:
|
23 |
-
return
|
24 |
-
|
25 |
-
def test_train(self) -> None:
|
26 |
-
return
|
27 |
-
|
28 |
-
|
29 |
-
if __name__ == "__main__":
|
30 |
-
unittest.main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vis.py
CHANGED
@@ -1,20 +1,70 @@
|
|
1 |
-
import
|
2 |
from sklearn import datasets
|
3 |
-
|
|
|
|
|
|
|
4 |
import numpy as np
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
size="petal_length",
|
17 |
-
size_max=18,
|
18 |
-
)
|
19 |
-
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
|
20 |
return fig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import matplotlib
|
2 |
from sklearn import datasets
|
3 |
+
import plotly.graph_objects as go
|
4 |
+
import plotly.express as px
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
import matplotlib
|
7 |
import numpy as np
|
8 |
+
|
9 |
+
matplotlib.use("Agg")
|
10 |
+
|
11 |
+
|
12 |
+
def show_digits():
|
13 |
+
digits = datasets.load_digits()
|
14 |
+
fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(10, 3))
|
15 |
+
for ax, image, label in zip(axes, digits.images, digits.target):
|
16 |
+
ax.set_axis_off()
|
17 |
+
ax.imshow(image, cmap=plt.cm.gray_r, interpolation="nearest")
|
18 |
+
ax.set_title("Training: %i" % label)
|
|
|
|
|
|
|
|
|
19 |
return fig
|
20 |
+
|
21 |
+
|
22 |
+
def loss_history_plt(loss_history: list[float], loss_fn_name: str):
|
23 |
+
return px.line(
|
24 |
+
x=[i for i in range(len(loss_history))],
|
25 |
+
y=loss_history,
|
26 |
+
title=f"{loss_fn_name} Loss vs. Training Epoch",
|
27 |
+
labels={
|
28 |
+
"x": "Epochs",
|
29 |
+
"y": f"{loss_fn_name} Loss",
|
30 |
+
},
|
31 |
+
)
|
32 |
+
|
33 |
+
|
34 |
+
def hits_and_misses(y_pred: np.ndarray, y_true: np.ndarray):
|
35 |
+
# decode the one hot encoded predictions
|
36 |
+
y_pred_decoded = np.argmax(y_pred, axis=1)
|
37 |
+
y_true_decoded = np.argmax(y_true, axis=1)
|
38 |
+
|
39 |
+
hits = y_pred_decoded == y_true_decoded
|
40 |
+
color = np.where(hits, "Hit", "Miss")
|
41 |
+
hover_text = [
|
42 |
+
"True: " + str(y_true_decoded[i]) + ", Pred: " + str(y_pred_decoded[i])
|
43 |
+
for i in range(len(y_pred_decoded))
|
44 |
+
]
|
45 |
+
|
46 |
+
return px.scatter(
|
47 |
+
x=np.arange(len(y_pred_decoded)),
|
48 |
+
y=y_true_decoded,
|
49 |
+
color=color,
|
50 |
+
title="Hits and Misses of Predictions",
|
51 |
+
labels={
|
52 |
+
"color": "Prediction Correctness",
|
53 |
+
"x": "Sample Index",
|
54 |
+
"y": "True Label",
|
55 |
+
},
|
56 |
+
color_discrete_map={"Hit": "blue", "Miss": "red"},
|
57 |
+
hover_name=hover_text,
|
58 |
+
)
|
59 |
+
|
60 |
+
|
61 |
+
def make_confidence_label(y_pred: np.ndarray, y_test: np.ndarray):
|
62 |
+
# decode the one hot endoced predictions
|
63 |
+
y_pred_labels = np.argmax(y_pred, axis=1)
|
64 |
+
y_test_labels = np.argmax(y_test, axis=1)
|
65 |
+
confidence_dict: dict[str, float] = {}
|
66 |
+
for idx, class_name in enumerate([str(i) for i in range(10)]):
|
67 |
+
class_confidences_idxs = np.where(y_test_labels == idx)[0]
|
68 |
+
class_confidences = y_pred[class_confidences_idxs, idx]
|
69 |
+
confidence_dict[class_name] = float(np.mean(class_confidences))
|
70 |
+
return confidence_dict
|