Spaces:
Sleeping
Sleeping
Jensen-holm
commited on
Commit
·
03f2b37
1
Parent(s):
9117cbd
Moving towards making this a python package for simple neural network
Browse filescreation. This means making it an option to make this a gradio app or
not, that logic is handled in __post_init__ and it not expensive. Also I
have implemented a forward_fn thing so we can handle non logits loss
functions if that makes sense
- app.py +3 -3
- nn/nn.py +49 -49
- requirements.txt +5 -3
app.py
CHANGED
@@ -61,6 +61,7 @@ def classification(
|
|
61 |
input_size=64, # 8x8 pixel grid images
|
62 |
output_size=10, # digits 0-9
|
63 |
seed=seed,
|
|
|
64 |
)
|
65 |
|
66 |
nn_classifier.train(X_train=X_train, y_train=y_train)
|
@@ -81,6 +82,7 @@ def classification(
|
|
81 |
|
82 |
|
83 |
if __name__ == "__main__":
|
|
|
84 |
def _open_warning() -> str:
|
85 |
with open("warning.md", "r") as f:
|
86 |
return f.read()
|
@@ -101,9 +103,7 @@ if __name__ == "__main__":
|
|
101 |
with gr.Row():
|
102 |
with gr.Column():
|
103 |
numeric_inputs = [
|
104 |
-
gr.Slider(
|
105 |
-
minimum=100, maximum=10_000, step=50, label="Epochs"
|
106 |
-
),
|
107 |
gr.Slider(
|
108 |
minimum=2, maximum=64, step=2, label="Hidden Network Size"
|
109 |
),
|
|
|
61 |
input_size=64, # 8x8 pixel grid images
|
62 |
output_size=10, # digits 0-9
|
63 |
seed=seed,
|
64 |
+
_gradio_app=True,
|
65 |
)
|
66 |
|
67 |
nn_classifier.train(X_train=X_train, y_train=y_train)
|
|
|
82 |
|
83 |
|
84 |
if __name__ == "__main__":
|
85 |
+
|
86 |
def _open_warning() -> str:
|
87 |
with open("warning.md", "r") as f:
|
88 |
return f.read()
|
|
|
103 |
with gr.Row():
|
104 |
with gr.Column():
|
105 |
numeric_inputs = [
|
106 |
+
gr.Slider(minimum=100, maximum=10_000, step=50, label="Epochs"),
|
|
|
|
|
107 |
gr.Slider(
|
108 |
minimum=2, maximum=64, step=2, label="Hidden Network Size"
|
109 |
),
|
nn/nn.py
CHANGED
@@ -1,9 +1,11 @@
|
|
1 |
from dataclasses import dataclass, field
|
|
|
2 |
import gradio as gr
|
3 |
import numpy as np
|
|
|
4 |
|
5 |
from nn.activation import Activation
|
6 |
-
from nn.loss import Loss
|
7 |
|
8 |
|
9 |
DTYPE = np.float32
|
@@ -19,29 +21,43 @@ class NN:
|
|
19 |
output_size: int
|
20 |
hidden_activation_fn: Activation
|
21 |
output_activation_fn: Activation
|
22 |
-
loss_fn: Loss
|
23 |
seed: int
|
24 |
|
|
|
|
|
|
|
|
|
|
|
25 |
_loss_history: list = field(default_factory=lambda: [], init=False)
|
26 |
_wo: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
|
27 |
_wh: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
|
28 |
_bo: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
|
29 |
_bh: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
|
30 |
|
31 |
-
# not currently using this, see TODO: at bottom of this file
|
32 |
-
# _weight_history: dict[str, list[np.ndarray]] = field(
|
33 |
-
# default_factory=lambda: {
|
34 |
-
# "wo": [],
|
35 |
-
# "wh": [],
|
36 |
-
# "bo": [],
|
37 |
-
# "bh": [],
|
38 |
-
# },
|
39 |
-
# init=False,
|
40 |
-
# )
|
41 |
-
|
42 |
def __post_init__(self) -> None:
|
43 |
-
assert 0 < self.batch_size <= 1
|
44 |
self._init_weights_and_biases()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
@classmethod
|
47 |
def from_dict(cls, args: dict) -> "NN":
|
@@ -73,35 +89,23 @@ class NN:
|
|
73 |
dtype=DTYPE,
|
74 |
)
|
75 |
|
76 |
-
# def _forward(self, X_train: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
77 |
-
# # Determine the activation function for the hidden layer
|
78 |
-
# if self._activation_fn.__class__.__name__ == "SoftMax":
|
79 |
-
# # Using ReLU for hidden layer when softmax is used in output layer
|
80 |
-
# hidden_layer_activation = Sigmoid()
|
81 |
-
# else:
|
82 |
-
# # Use the specified activation function if not using softmax
|
83 |
-
# hidden_layer_activation = self._activation_fn
|
84 |
-
|
85 |
-
# # Compute the hidden layer output
|
86 |
-
# hidden_layer_output = hidden_layer_activation.forward(
|
87 |
-
# np.dot(X_train, self._wh) + self._bh
|
88 |
-
# )
|
89 |
-
|
90 |
-
# # Compute the output layer (prediction layer) using the specified activation function
|
91 |
-
# y_hat = self._activation_fn.forward(
|
92 |
-
# np.dot(hidden_layer_output, self._wo) + self._bo
|
93 |
-
# )
|
94 |
-
|
95 |
-
# return y_hat, hidden_layer_output
|
96 |
-
|
97 |
-
# TODO: make this forward function the main _forward function if
|
98 |
-
# the loss function that the user selected is a "logits" loss. Call
|
99 |
-
# The one above if it is not.
|
100 |
def _forward(self, X_train: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
hidden_layer_output = self.hidden_activation_fn.forward(
|
102 |
np.dot(X_train, self._wh) + self._bh,
|
103 |
)
|
104 |
-
#
|
105 |
logits = np.dot(hidden_layer_output, self._wo) + self._bo
|
106 |
return logits, hidden_layer_output
|
107 |
|
@@ -145,8 +149,10 @@ class NN:
|
|
145 |
self._bh -= bh_prime
|
146 |
|
147 |
def train(self, X_train: np.ndarray, y_train: np.ndarray) -> "NN":
|
148 |
-
|
|
|
149 |
|
|
|
150 |
n_samples = int(self.batch_size * X_train.shape[0])
|
151 |
batch_indeces = np.random.choice(
|
152 |
X_train.shape[0], size=n_samples, replace=False
|
@@ -155,7 +161,7 @@ class NN:
|
|
155 |
X_train_batch = X_train[batch_indeces]
|
156 |
y_train_batch = y_train[batch_indeces]
|
157 |
|
158 |
-
y_hat, hidden_output = self.
|
159 |
loss = self.loss_fn.forward(y_hat=y_hat, y_true=y_train_batch)
|
160 |
self._loss_history.append(loss)
|
161 |
self._backward(
|
@@ -165,15 +171,9 @@ class NN:
|
|
165 |
hidden_output=hidden_output,
|
166 |
)
|
167 |
|
168 |
-
# TODO: make a 3d visualization traversing loss plane. Might be too
|
169 |
-
# expenzive to do though.
|
170 |
-
# keep track of weights an biases at each epoch for visualization
|
171 |
-
# self._weight_history["wo"].append(self._wo[0, 0])
|
172 |
-
# self._weight_history["wh"].append(self._wh[0, 0])
|
173 |
-
# self._weight_history["bo"].append(self._bo[0, 0])
|
174 |
-
# self._weight_history["bh"].append(self._bh[0, 0])
|
175 |
return self
|
176 |
|
177 |
def predict(self, X_test: np.ndarray) -> np.ndarray:
|
178 |
-
|
|
|
179 |
return self.output_activation_fn.forward(pred)
|
|
|
1 |
from dataclasses import dataclass, field
|
2 |
+
from typing import Callable, Optional
|
3 |
import gradio as gr
|
4 |
import numpy as np
|
5 |
+
from tqdm import tqdm
|
6 |
|
7 |
from nn.activation import Activation
|
8 |
+
from nn.loss import Loss, LogitsLoss
|
9 |
|
10 |
|
11 |
DTYPE = np.float32
|
|
|
21 |
output_size: int
|
22 |
hidden_activation_fn: Activation
|
23 |
output_activation_fn: Activation
|
24 |
+
loss_fn: Loss | LogitsLoss
|
25 |
seed: int
|
26 |
|
27 |
+
_gradio_app: bool = False
|
28 |
+
_p_bar: Optional[tqdm | gr.Progress] = field(
|
29 |
+
default_factory=lambda: None, init=False
|
30 |
+
)
|
31 |
+
_forward_fn: Optional[Callable] = field(default_factory=lambda: None, init=False)
|
32 |
_loss_history: list = field(default_factory=lambda: [], init=False)
|
33 |
_wo: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
|
34 |
_wh: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
|
35 |
_bo: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
|
36 |
_bh: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
def __post_init__(self) -> None:
|
|
|
39 |
self._init_weights_and_biases()
|
40 |
+
self._forward_fn, self._p_bar = self._pre_train()
|
41 |
+
|
42 |
+
assert 0 < self.batch_size <= 1
|
43 |
+
assert self._forward_fn is not None
|
44 |
+
assert self._p_bar is not None
|
45 |
+
|
46 |
+
def _pre_train(self) -> tuple[Callable, tqdm | gr.Progress]:
|
47 |
+
def _get_forward_fn() -> Callable:
|
48 |
+
if isinstance(self.loss_fn, LogitsLoss):
|
49 |
+
return self._forward_logits
|
50 |
+
return self._forward
|
51 |
+
|
52 |
+
def _get_p_bar() -> tqdm | gr.Progress:
|
53 |
+
if self._gradio_app:
|
54 |
+
return gr.Progress().tqdm(range(self.epochs))
|
55 |
+
return tqdm(range(self.epochs), unit="epoch", ascii=" >=")
|
56 |
+
|
57 |
+
return (
|
58 |
+
_get_forward_fn(),
|
59 |
+
_get_p_bar(),
|
60 |
+
)
|
61 |
|
62 |
@classmethod
|
63 |
def from_dict(cls, args: dict) -> "NN":
|
|
|
89 |
dtype=DTYPE,
|
90 |
)
|
91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
def _forward(self, X_train: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
93 |
+
hidden_layer_output = self.hidden_activation_fn.forward(
|
94 |
+
np.dot(X_train, self._wh) + self._bh
|
95 |
+
)
|
96 |
+
|
97 |
+
# Compute the output layer (prediction layer) using the specified activation function
|
98 |
+
y_hat = self.output_activation_fn.forward(
|
99 |
+
np.dot(hidden_layer_output, self._wo) + self._bo
|
100 |
+
)
|
101 |
+
|
102 |
+
return y_hat, hidden_layer_output
|
103 |
+
|
104 |
+
def _forward_logits(self, X_train: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
105 |
hidden_layer_output = self.hidden_activation_fn.forward(
|
106 |
np.dot(X_train, self._wh) + self._bh,
|
107 |
)
|
108 |
+
# output layer does not apply softmax like other forward function, just return logits
|
109 |
logits = np.dot(hidden_layer_output, self._wo) + self._bo
|
110 |
return logits, hidden_layer_output
|
111 |
|
|
|
149 |
self._bh -= bh_prime
|
150 |
|
151 |
def train(self, X_train: np.ndarray, y_train: np.ndarray) -> "NN":
|
152 |
+
assert self._p_bar is not None
|
153 |
+
assert self._forward_fn is not None
|
154 |
|
155 |
+
for _ in self._p_bar:
|
156 |
n_samples = int(self.batch_size * X_train.shape[0])
|
157 |
batch_indeces = np.random.choice(
|
158 |
X_train.shape[0], size=n_samples, replace=False
|
|
|
161 |
X_train_batch = X_train[batch_indeces]
|
162 |
y_train_batch = y_train[batch_indeces]
|
163 |
|
164 |
+
y_hat, hidden_output = self._forward_fn(X_train=X_train_batch)
|
165 |
loss = self.loss_fn.forward(y_hat=y_hat, y_true=y_train_batch)
|
166 |
self._loss_history.append(loss)
|
167 |
self._backward(
|
|
|
171 |
hidden_output=hidden_output,
|
172 |
)
|
173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
return self
|
175 |
|
176 |
def predict(self, X_test: np.ndarray) -> np.ndarray:
|
177 |
+
assert self._forward_fn is not None
|
178 |
+
pred, _ = self._forward_fn(X_test)
|
179 |
return self.output_activation_fn.forward(pred)
|
requirements.txt
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
-
gradio==4.
|
|
|
2 |
numpy==1.26.4
|
3 |
-
plotly==5.
|
4 |
-
scikit_learn==1.4.
|
|
|
|
1 |
+
gradio==4.27.0
|
2 |
+
matplotlib==3.8.4
|
3 |
numpy==1.26.4
|
4 |
+
plotly==5.21.0
|
5 |
+
scikit_learn==1.4.2
|
6 |
+
tqdm==4.66.2
|