Jensen-holm commited on
Commit
6d0453c
1 Parent(s): 6307b4f

latest update, finishing classification with MNIST! More details on

Browse files
Files changed (6) hide show
  1. app.py +114 -37
  2. nn/activation.py +1 -4
  3. nn/loss.py +30 -5
  4. nn/nn.py +75 -73
  5. nn/test.py +0 -30
  6. vis.py +67 -17
app.py CHANGED
@@ -1,71 +1,117 @@
 
1
  import plotly.express as px
2
  from sklearn import datasets
3
- from sklearn.preprocessing import StandardScaler, OneHotEncoder
4
  from sklearn.model_selection import train_test_split
5
  import numpy as np
6
  import gradio as gr
7
- from vis import iris_3d_scatter
8
  import nn # custom neural network module
 
 
 
 
 
 
9
 
10
 
11
- def _preprocess_iris_data(
12
  seed: int,
13
  ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
14
- iris = datasets.load_iris()
15
- X = iris["data"]
16
- y = iris["target"]
17
- # normalize the features
18
- X = StandardScaler().fit_transform(X)
19
- # one hot encode the target variables
20
- y = OneHotEncoder().fit_transform(y.reshape(-1, 1)).toarray()
21
- return train_test_split(
22
- X,
23
  y,
24
  test_size=0.2,
25
  random_state=seed,
26
  )
 
27
 
28
 
29
- X_train, X_test, y_train, y_test = _preprocess_iris_data(seed=1)
30
 
31
 
32
- def main(
33
  Seed: int = 0,
 
34
  Activation_Func: str = "SoftMax",
35
- Loss_Func: str = "CrossEntropy",
36
  Epochs: int = 100,
37
  Hidden_Size: int = 8,
38
- Learning_Rate: float = 0.01,
39
- ) -> gr.Plot:
 
 
 
40
 
41
- iris_classifier = nn.NN(
42
  epochs=Epochs,
43
  learning_rate=Learning_Rate,
44
- activation_fn=Activation_Func,
45
- loss_fn=Loss_Func,
 
46
  hidden_size=Hidden_Size,
47
- input_size=4, # number of features in iris dataset
48
- output_size=3, # three classes in iris dataset
49
  seed=Seed,
50
  )
 
51
 
52
- iris_classifier.train(X_train=X_train, y_train=y_train)
53
- loss_fig = px.line(
54
- x=[i for i in range(len(iris_classifier._loss_history))],
55
- y=iris_classifier._loss_history,
 
56
  )
57
 
58
- return gr.Plot(loss_fig)
 
 
 
 
 
59
 
60
 
61
  if __name__ == "__main__":
62
  with gr.Blocks() as interface:
63
  gr.Markdown("# Backpropagation Playground")
 
 
 
64
 
65
- with gr.Tab("Classification"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
 
67
  with gr.Row():
68
- data_plt = iris_3d_scatter()
69
  gr.Plot(data_plt)
70
 
71
  with gr.Row():
@@ -75,34 +121,65 @@ if __name__ == "__main__":
75
  with gr.Row():
76
  with gr.Column():
77
  numeric_inputs = [
78
- gr.Slider(minimum=100, maximum=10_000, step=50, label="Epochs"),
 
 
79
  gr.Slider(
80
  minimum=2, maximum=64, step=2, label="Hidden Network Size"
81
  ),
82
  gr.Number(minimum=0.00001, maximum=1.5, label="Learning Rate"),
83
  ]
 
84
  with gr.Column():
85
  fn_inputs = [
86
  gr.Dropdown(
87
- choices=["SoftMax"], label="Activation Function"
 
 
 
 
 
 
88
  ),
89
- gr.Dropdown(choices=["CrossEntropy"], label="Loss Function"),
90
  ]
91
 
 
92
  with gr.Row():
93
  train_btn = gr.Button("Train", variant="primary")
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  # outputs in row below inputs
96
  with gr.Row():
97
- plt_outputs = [gr.Plot()]
 
 
 
 
 
 
98
 
99
  train_btn.click(
100
- fn=main,
101
- inputs=seed_input + fn_inputs + numeric_inputs,
102
- outputs=plt_outputs,
103
  )
104
 
105
  with gr.Tab("Regression"):
106
- ...
107
 
108
  interface.launch(show_error=True)
 
1
+ from nn.activation import SoftMax
2
  import plotly.express as px
3
  from sklearn import datasets
4
+ from sklearn.preprocessing import OneHotEncoder
5
  from sklearn.model_selection import train_test_split
6
  import numpy as np
7
  import gradio as gr
8
+
9
  import nn # custom neural network module
10
+ from vis import ( # classification visualization funcitons
11
+ show_digits,
12
+ hits_and_misses,
13
+ loss_history_plt,
14
+ make_confidence_label,
15
+ )
16
 
17
 
18
+ def _preprocess_digits(
19
  seed: int,
20
  ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
21
+ digits = datasets.load_digits()
22
+ n_samples = len(digits.images)
23
+ data = digits.images.reshape((n_samples, -1))
24
+ y = OneHotEncoder().fit_transform(digits.target.reshape(-1, 1)).toarray()
25
+ X_train, X_test, y_train, y_test = train_test_split(
26
+ data,
 
 
 
27
  y,
28
  test_size=0.2,
29
  random_state=seed,
30
  )
31
+ return X_train, X_test, y_train, y_test
32
 
33
 
34
+ X_train, X_test, y_train, y_test = _preprocess_digits(seed=1)
35
 
36
 
37
+ def classification(
38
  Seed: int = 0,
39
+ Hidden_Layer_Activation: str = "Relu",
40
  Activation_Func: str = "SoftMax",
41
+ Loss_Func: str = "CrossEntropyWithLogitsLoss",
42
  Epochs: int = 100,
43
  Hidden_Size: int = 8,
44
+ Learning_Rate: float = 0.001,
45
+ ) -> tuple[gr.Plot, gr.Plot, gr.Label]:
46
+ assert Activation_Func in nn.ACTIVATIONS
47
+ assert Hidden_Layer_Activation in nn.ACTIVATIONS
48
+ assert Loss_Func in nn.LOSSES
49
 
50
+ classifier = nn.NN(
51
  epochs=Epochs,
52
  learning_rate=Learning_Rate,
53
+ hidden_activation_fn=nn.ACTIVATIONS[Hidden_Layer_Activation],
54
+ activation_fn=nn.ACTIVATIONS[Activation_Func],
55
+ loss_fn=nn.LOSSES[Loss_Func],
56
  hidden_size=Hidden_Size,
57
+ input_size=64, # 8x8 image of pixels
58
+ output_size=10, # digits 0-9
59
  seed=Seed,
60
  )
61
+ classifier.train(X_train=X_train, y_train=y_train)
62
 
63
+ pred = classifier.predict(X_test=X_test)
64
+ hits_and_misses_fig = hits_and_misses(y_pred=pred, y_true=y_test)
65
+ loss_fig = loss_history_plt(
66
+ loss_history=classifier._loss_history,
67
+ loss_fn_name=classifier.loss_fn.__class__.__name__,
68
  )
69
 
70
+ label_dict = make_confidence_label(y_pred=pred, y_test=y_test)
71
+ return (
72
+ gr.Plot(loss_fig, show_label=False),
73
+ gr.Plot(hits_and_misses_fig, show_label=False),
74
+ gr.Label(label_dict, label="Classification Confidence Rankings"),
75
+ )
76
 
77
 
78
  if __name__ == "__main__":
79
  with gr.Blocks() as interface:
80
  gr.Markdown("# Backpropagation Playground")
81
+ gr.Markdown(
82
+ """
83
+ ## What is this? <br>
84
 
85
+ The Backpropagation Playground is a GUI built around a neural network framework that I have built from scratch
86
+ in [numpy](https://numpy.org/). In this GUI, you can test different hyper parameters that will be fed to this framework and used
87
+ to train a neural network on the [MNIST](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) dataset of 8x8 pixel images.
88
+
89
+ ## ⚠️ PLEASE READ ⚠️
90
+ This application is impossibly slow on the HuggingFace CPU instance that it is running on. It is advised to clone the
91
+ repository and run it locally.
92
+
93
+ In order to get a decent classification score on the validation set of the MNIST data (hard coded to 20%), you will have to
94
+ do somewhere between 15,000 epochs and 50,000 epochs with a learning rate around 0.001, and a hidden layer size
95
+ over 10. (roughly the example that I have provided). Running this many epochs with a hidden layer of that size
96
+ is pretty expensive on 2 cpu cores that this space has. So if you are actually curious, you might want to clone
97
+ this and run it locally because it will be much much faster.
98
+
99
+ `git clone https://huggingface.co/spaces/Jensen-holm/Backprop-Playground`
100
+
101
+ After cloning, you will have to install the dependencies from requirements.txt into your environment. (venv reccommended)
102
+
103
+ `pip3 install -r requirements.txt`
104
+
105
+ Then, you can run the application on localhost with the following command.
106
+
107
+ `python3 app.py`
108
+
109
+ """
110
+ )
111
 
112
+ with gr.Tab("Classification"):
113
  with gr.Row():
114
+ data_plt = show_digits()
115
  gr.Plot(data_plt)
116
 
117
  with gr.Row():
 
121
  with gr.Row():
122
  with gr.Column():
123
  numeric_inputs = [
124
+ gr.Slider(
125
+ minimum=100, maximum=100_000, step=50, label="Epochs"
126
+ ),
127
  gr.Slider(
128
  minimum=2, maximum=64, step=2, label="Hidden Network Size"
129
  ),
130
  gr.Number(minimum=0.00001, maximum=1.5, label="Learning Rate"),
131
  ]
132
+
133
  with gr.Column():
134
  fn_inputs = [
135
  gr.Dropdown(
136
+ choices=["Relu", "Sigmoid", "TanH"],
137
+ label="Hidden Layer Activation",
138
+ ),
139
+ gr.Dropdown(choices=["SoftMax"], label="Output Activation"),
140
+ gr.Dropdown(
141
+ choices=["CrossEntropy", "CrossEntropyWithLogitsLoss"],
142
+ label="Loss Function",
143
  ),
 
144
  ]
145
 
146
+ inputs = seed_input + fn_inputs + numeric_inputs
147
  with gr.Row():
148
  train_btn = gr.Button("Train", variant="primary")
149
 
150
+ with gr.Row():
151
+ gr.Examples(
152
+ examples=[
153
+ [
154
+ 2,
155
+ "Relu",
156
+ "SoftMax",
157
+ "CrossEntropyWithLogitsLoss",
158
+ 15_000,
159
+ 14,
160
+ 0.001,
161
+ ]
162
+ ],
163
+ inputs=inputs,
164
+ )
165
+
166
  # outputs in row below inputs
167
  with gr.Row():
168
+ plt_outputs = [
169
+ gr.Plot(label="Loss History / Epoch"),
170
+ gr.Plot(label="Hits & Misses"),
171
+ ]
172
+
173
+ with gr.Row():
174
+ label_output = [gr.Label(label="Class Confidences")]
175
 
176
  train_btn.click(
177
+ fn=classification,
178
+ inputs=inputs,
179
+ outputs=plt_outputs + label_output,
180
  )
181
 
182
  with gr.Tab("Regression"):
183
+ gr.Markdown("### Coming Soon")
184
 
185
  interface.launch(show_error=True)
nn/activation.py CHANGED
@@ -2,9 +2,6 @@ import numpy as np
2
  from abc import abstractmethod, ABC
3
 
4
 
5
- __all__ = ["Activation", "Relu", "TanH", "Sigmoid", "SoftMax", "ACTIVATIONS"]
6
-
7
-
8
  class Activation(ABC):
9
  @abstractmethod
10
  def forward(self, X: np.ndarray) -> np.ndarray:
@@ -54,6 +51,6 @@ class SoftMax(Activation):
54
  ACTIVATIONS: dict[str, Activation] = {
55
  "Relu": Relu(),
56
  "Sigmoid": Sigmoid(),
57
- "Tanh": TanH(),
58
  "SoftMax": SoftMax(),
59
  }
 
2
  from abc import abstractmethod, ABC
3
 
4
 
 
 
 
5
  class Activation(ABC):
6
  @abstractmethod
7
  def forward(self, X: np.ndarray) -> np.ndarray:
 
51
  ACTIVATIONS: dict[str, Activation] = {
52
  "Relu": Relu(),
53
  "Sigmoid": Sigmoid(),
54
+ "TanH": TanH(),
55
  "SoftMax": SoftMax(),
56
  }
nn/loss.py CHANGED
@@ -3,9 +3,6 @@ from nn.activation import SoftMax
3
  import numpy as np
4
 
5
 
6
- __all__ = ["Loss", "MSE", "CrossEntropy", "LOSSES"]
7
-
8
-
9
  class Loss(ABC):
10
  @abstractmethod
11
  def forward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
@@ -16,6 +13,10 @@ class Loss(ABC):
16
  pass
17
 
18
 
 
 
 
 
19
  class MSE(Loss):
20
  def forward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
21
  return np.sum(np.square(y_hat - y_true)) / y_true.shape[0]
@@ -30,21 +31,45 @@ class CrossEntropy(Loss):
30
  y_true = np.asarray(y_true)
31
  m = y_true.shape[0]
32
  p = self._softmax(y_hat)
33
- log_likelihood = -np.log(p[range(m), y_true.argmax(axis=1)])
 
 
 
34
  loss = np.sum(log_likelihood) / m
35
  return loss
36
 
37
  def backward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
38
  y_hat = np.asarray(y_hat)
39
  y_true = np.asarray(y_true)
40
- return (y_hat - y_true) / y_true.shape[0]
 
41
 
42
  @staticmethod
43
  def _softmax(X: np.ndarray) -> np.ndarray:
44
  return SoftMax().forward(X)
45
 
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  LOSSES: dict[str, Loss] = {
48
  "MSE": MSE(),
49
  "CrossEntropy": CrossEntropy(),
 
50
  }
 
3
  import numpy as np
4
 
5
 
 
 
 
6
  class Loss(ABC):
7
  @abstractmethod
8
  def forward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
 
13
  pass
14
 
15
 
16
+ class LogitsLoss(Loss):
17
+ pass
18
+
19
+
20
  class MSE(Loss):
21
  def forward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
22
  return np.sum(np.square(y_hat - y_true)) / y_true.shape[0]
 
31
  y_true = np.asarray(y_true)
32
  m = y_true.shape[0]
33
  p = self._softmax(y_hat)
34
+ eps = 1e-15 # to prevent log(0)
35
+ log_likelihood = -np.log(
36
+ np.clip(p[range(m), y_true.argmax(axis=1)], a_min=eps, a_max=None)
37
+ )
38
  loss = np.sum(log_likelihood) / m
39
  return loss
40
 
41
  def backward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
42
  y_hat = np.asarray(y_hat)
43
  y_true = np.asarray(y_true)
44
+ grad = y_hat - y_true
45
+ return grad / y_true.shape[0]
46
 
47
  @staticmethod
48
  def _softmax(X: np.ndarray) -> np.ndarray:
49
  return SoftMax().forward(X)
50
 
51
 
52
+ class CrossEntropyWithLogits(LogitsLoss):
53
+ def forward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
54
+ # Apply the log-sum-exp trick for numerical stability
55
+ max_logits = np.max(y_hat, axis=1, keepdims=True)
56
+ log_sum_exp = np.log(np.sum(np.exp(y_hat - max_logits), axis=1, keepdims=True))
57
+ log_probs = y_hat - max_logits - log_sum_exp
58
+ # Select the log probability of the true class
59
+ loss = -np.sum(log_probs * y_true) / y_true.shape[0]
60
+ return loss
61
+
62
+ def backward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
63
+ # Compute softmax probabilities
64
+ exps = np.exp(y_hat - np.max(y_hat, axis=1, keepdims=True))
65
+ probs = exps / np.sum(exps, axis=1, keepdims=True)
66
+ # Subtract the one-hot encoded labels from the probabilities
67
+ grad = (probs - y_true) / y_true.shape[0]
68
+ return grad
69
+
70
+
71
  LOSSES: dict[str, Loss] = {
72
  "MSE": MSE(),
73
  "CrossEntropy": CrossEntropy(),
74
+ "CrossEntropyWithLogitsLoss": CrossEntropyWithLogits(),
75
  }
nn/nn.py CHANGED
@@ -1,55 +1,42 @@
1
- from typing import Optional
2
- from nn.activation import ACTIVATIONS, Activation
3
- from nn.loss import LOSSES, Loss
4
  import numpy as np
5
 
6
- import gradio as gr
 
7
 
8
 
9
  DTYPE = np.float32
10
 
11
 
 
12
  class NN:
13
- def __init__(
14
- self,
15
- epochs: int,
16
- learning_rate: float,
17
- hidden_size: int,
18
- input_size: int,
19
- output_size: int,
20
- activation_fn: str,
21
- loss_fn: str,
22
- seed: int,
23
- ) -> None:
24
- self.epochs = epochs
25
- self.learning_rate = learning_rate
26
- self.hidden_size = hidden_size
27
- self.input_size = input_size
28
- self.output_size = output_size
29
- self.seed = seed
30
-
31
- # try to get activation function and loss funciton
32
- act_fn = ACTIVATIONS.get(activation_fn, None)
33
- if act_fn is None:
34
- raise KeyError(f"Invalid Activation function '{activation_fn}'")
35
- loss_fn = LOSSES.get(loss_fn, None)
36
- if loss_fn is None:
37
- raise KeyError(f"Invalid Activation function '{activation_fn}'")
38
- self._activation_fn: Activation = act_fn
39
- self._loss_fn: Loss = loss_fn
40
-
41
- self._loss_history = list()
42
- self._weight_history = {
43
  "wo": [],
44
  "wh": [],
45
  "bo": [],
46
  "bh": [],
47
- }
 
 
48
 
49
- self._wo: Optional[np.ndarray] = None
50
- self._wh: Optional[np.ndarray] = None
51
- self._bo: Optional[np.ndarray] = None
52
- self._bh: Optional[np.ndarray] = None
53
  self._init_weights_and_biases()
54
 
55
  def _init_weights_and_biases(self) -> None:
@@ -79,28 +66,37 @@ class NN:
79
  )
80
  return
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  def _forward(self, X_train: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
83
- """
84
- _forward(X_train): ran as the first step of each epoch during training.
85
-
86
- params:
87
- X_train: np.ndarray -> data that we are training the NN on.
88
-
89
- returns:
90
- output layer np array containing the predicted outputs calculated using
91
- the weights and biases of the current epoch.
92
- """
93
- assert self._activation_fn is not None
94
-
95
- # hidden layer
96
- hidden_layer_output = self._activation_fn.forward(
97
- np.dot(X_train, self._wh) + self._bh
98
  )
99
- # output layer (prediction layer)
100
- y_hat = self._activation_fn.forward(
101
- np.dot(hidden_layer_output, self._wo) + self._bo
102
- )
103
- return y_hat, hidden_layer_output
104
 
105
  def _backward(
106
  self,
@@ -109,22 +105,18 @@ class NN:
109
  y_train: np.ndarray,
110
  hidden_output: np.ndarray,
111
  ) -> None:
112
- assert self._activation_fn is not None
113
  assert self._wo is not None
114
- assert self._loss_fn is not None
115
 
116
  # Calculate the error at the output
117
  # This should be the derivative of the loss function with respect to the output of the network
118
- error_output = self._loss_fn.backward(
119
- y_hat, y_train
120
- ) * self._activation_fn.backward(y_hat)
121
 
122
  # Calculate gradients for output layer weights and biases
123
  wo_prime = np.dot(hidden_output.T, error_output) * self.learning_rate
124
  bo_prime = np.sum(error_output, axis=0, keepdims=True) * self.learning_rate
125
 
126
  # Propagate the error back to the hidden layer
127
- error_hidden = np.dot(error_output, self._wo.T) * self._activation_fn.backward(
128
  hidden_output
129
  )
130
 
@@ -132,18 +124,25 @@ class NN:
132
  wh_prime = np.dot(X_train.T, error_hidden) * self.learning_rate
133
  bh_prime = np.sum(error_hidden, axis=0, keepdims=True) * self.learning_rate
134
 
 
 
 
 
 
 
 
135
  # Update weights and biases
136
  self._wo -= wo_prime
137
  self._wh -= wh_prime
138
  self._bo -= bo_prime
139
  self._bh -= bh_prime
140
 
 
 
141
  def train(self, X_train: np.ndarray, y_train: np.ndarray) -> "NN":
142
- assert self._loss_fn is not None
143
-
144
  for _ in gr.Progress().tqdm(range(self.epochs)):
145
  y_hat, hidden_output = self._forward(X_train=X_train)
146
- loss = self._loss_fn.forward(y_hat=y_hat, y_true=y_train)
147
  self._loss_history.append(loss)
148
  self._backward(
149
  X_train=X_train,
@@ -152,12 +151,15 @@ class NN:
152
  hidden_output=hidden_output,
153
  )
154
 
 
 
155
  # keep track of weights an biases at each epoch for visualization
156
- self._weight_history["wo"].append(self._wo[0, 0])
157
- self._weight_history["wh"].append(self._wh[0, 0])
158
- self._weight_history["bo"].append(self._bo[0, 0])
159
- self._weight_history["bh"].append(self._bh[0, 0])
160
  return self
161
 
162
  def predict(self, X_test: np.ndarray) -> np.ndarray:
163
- return self._forward(X_train=X_test)[0]
 
 
1
+ from dataclasses import dataclass, field
2
+ import gradio as gr
 
3
  import numpy as np
4
 
5
+ from nn.activation import Activation, Relu, SoftMax
6
+ from nn.loss import Loss
7
 
8
 
9
  DTYPE = np.float32
10
 
11
 
12
+ @dataclass
13
  class NN:
14
+ epochs: int
15
+ learning_rate: float
16
+ hidden_size: int
17
+ input_size: int
18
+ output_size: int
19
+ hidden_activation_fn: Activation
20
+ activation_fn: Activation
21
+ loss_fn: Loss
22
+ seed: int
23
+
24
+ _loss_history: list = field(default_factory=lambda: [], init=False)
25
+ _wo: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
26
+ _wh: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
27
+ _bo: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
28
+ _bh: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
29
+ _weight_history: dict[str, list[np.ndarray]] = field(
30
+ default_factory=lambda: {
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  "wo": [],
32
  "wh": [],
33
  "bo": [],
34
  "bh": [],
35
+ },
36
+ init=False,
37
+ )
38
 
39
+ def __post_init__(self) -> None:
 
 
 
40
  self._init_weights_and_biases()
41
 
42
  def _init_weights_and_biases(self) -> None:
 
66
  )
67
  return
68
 
69
+ # def _forward(self, X_train: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
70
+ # # Determine the activation function for the hidden layer
71
+ # if self._activation_fn.__class__.__name__ == "SoftMax":
72
+ # # Using ReLU for hidden layer when softmax is used in output layer
73
+ # hidden_layer_activation = Sigmoid()
74
+ # else:
75
+ # # Use the specified activation function if not using softmax
76
+ # hidden_layer_activation = self._activation_fn
77
+
78
+ # # Compute the hidden layer output
79
+ # hidden_layer_output = hidden_layer_activation.forward(
80
+ # np.dot(X_train, self._wh) + self._bh
81
+ # )
82
+
83
+ # # Compute the output layer (prediction layer) using the specified activation function
84
+ # y_hat = self._activation_fn.forward(
85
+ # np.dot(hidden_layer_output, self._wo) + self._bo
86
+ # )
87
+
88
+ # return y_hat, hidden_layer_output
89
+
90
+ # TODO: make this forward function the main _forward function if
91
+ # the loss function that the user selected is a "logits" loss. Call
92
+ # The one above if it is not.
93
  def _forward(self, X_train: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
94
+ hidden_layer_output = self.hidden_activation_fn.forward(
95
+ np.dot(X_train, self._wh) + self._bh,
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  )
97
+ # Output layer does not apply softmax anymore, just return logits
98
+ logits = np.dot(hidden_layer_output, self._wo) + self._bo
99
+ return logits, hidden_layer_output
 
 
100
 
101
  def _backward(
102
  self,
 
105
  y_train: np.ndarray,
106
  hidden_output: np.ndarray,
107
  ) -> None:
 
108
  assert self._wo is not None
 
109
 
110
  # Calculate the error at the output
111
  # This should be the derivative of the loss function with respect to the output of the network
112
+ error_output = self.loss_fn.backward(y_hat, y_train)
 
 
113
 
114
  # Calculate gradients for output layer weights and biases
115
  wo_prime = np.dot(hidden_output.T, error_output) * self.learning_rate
116
  bo_prime = np.sum(error_output, axis=0, keepdims=True) * self.learning_rate
117
 
118
  # Propagate the error back to the hidden layer
119
+ error_hidden = np.dot(error_output, self._wo.T) * self.activation_fn.backward(
120
  hidden_output
121
  )
122
 
 
124
  wh_prime = np.dot(X_train.T, error_hidden) * self.learning_rate
125
  bh_prime = np.sum(error_hidden, axis=0, keepdims=True) * self.learning_rate
126
 
127
+ # Gradient clipping to prevent overflow
128
+ max_norm = 1.0 # You can adjust this threshold
129
+ wo_prime = np.clip(wo_prime, -max_norm, max_norm)
130
+ bo_prime = np.clip(bo_prime, -max_norm, max_norm)
131
+ wh_prime = np.clip(wh_prime, -max_norm, max_norm)
132
+ bh_prime = np.clip(bh_prime, -max_norm, max_norm)
133
+
134
  # Update weights and biases
135
  self._wo -= wo_prime
136
  self._wh -= wh_prime
137
  self._bo -= bo_prime
138
  self._bh -= bh_prime
139
 
140
+ # TODO: implement batch size in training, this will speed up the training loop
141
+ # quite a bit I believe
142
  def train(self, X_train: np.ndarray, y_train: np.ndarray) -> "NN":
 
 
143
  for _ in gr.Progress().tqdm(range(self.epochs)):
144
  y_hat, hidden_output = self._forward(X_train=X_train)
145
+ loss = self.loss_fn.forward(y_hat=y_hat, y_true=y_train)
146
  self._loss_history.append(loss)
147
  self._backward(
148
  X_train=X_train,
 
151
  hidden_output=hidden_output,
152
  )
153
 
154
+ # TODO: make a 3d visualization traversing loss plane. Might be too
155
+ # expenzive to do though.
156
  # keep track of weights an biases at each epoch for visualization
157
+ # self._weight_history["wo"].append(self._wo[0, 0])
158
+ # self._weight_history["wh"].append(self._wh[0, 0])
159
+ # self._weight_history["bo"].append(self._bo[0, 0])
160
+ # self._weight_history["bh"].append(self._bh[0, 0])
161
  return self
162
 
163
  def predict(self, X_test: np.ndarray) -> np.ndarray:
164
+ pred, _ = self._forward(X_test)
165
+ return self.activation_fn.forward(pred)
nn/test.py DELETED
@@ -1,30 +0,0 @@
1
- from nn.nn import NN
2
- import unittest
3
-
4
- TEST_NN = NN(
5
- epochs=100,
6
- learning_rate=0.001,
7
- hidden_size=8,
8
- input_size=2,
9
- output_size=1,
10
- activation_fn="Sigmoid",
11
- loss_fn="MSE",
12
- )
13
-
14
-
15
- class TestNN(unittest.TestCase):
16
- def test_init_w_b(self) -> None:
17
- return
18
-
19
- def test_forward(self) -> None:
20
- return
21
-
22
- def test_backward(self) -> None:
23
- return
24
-
25
- def test_train(self) -> None:
26
- return
27
-
28
-
29
- if __name__ == "__main__":
30
- unittest.main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vis.py CHANGED
@@ -1,20 +1,70 @@
1
- import plotly.express as px
2
  from sklearn import datasets
3
- from sklearn.preprocessing import StandardScaler, OneHotEncoder
 
 
 
4
  import numpy as np
5
- import os
6
-
7
-
8
- def iris_3d_scatter():
9
- df = px.data.iris()
10
- fig = px.scatter_3d(
11
- df,
12
- x="sepal_length",
13
- y="sepal_width",
14
- z="petal_width",
15
- color="species",
16
- size="petal_length",
17
- size_max=18,
18
- )
19
- fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
20
  return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib
2
  from sklearn import datasets
3
+ import plotly.graph_objects as go
4
+ import plotly.express as px
5
+ import matplotlib.pyplot as plt
6
+ import matplotlib
7
  import numpy as np
8
+
9
+ matplotlib.use("Agg")
10
+
11
+
12
+ def show_digits():
13
+ digits = datasets.load_digits()
14
+ fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(10, 3))
15
+ for ax, image, label in zip(axes, digits.images, digits.target):
16
+ ax.set_axis_off()
17
+ ax.imshow(image, cmap=plt.cm.gray_r, interpolation="nearest")
18
+ ax.set_title("Training: %i" % label)
 
 
 
 
19
  return fig
20
+
21
+
22
+ def loss_history_plt(loss_history: list[float], loss_fn_name: str):
23
+ return px.line(
24
+ x=[i for i in range(len(loss_history))],
25
+ y=loss_history,
26
+ title=f"{loss_fn_name} Loss vs. Training Epoch",
27
+ labels={
28
+ "x": "Epochs",
29
+ "y": f"{loss_fn_name} Loss",
30
+ },
31
+ )
32
+
33
+
34
+ def hits_and_misses(y_pred: np.ndarray, y_true: np.ndarray):
35
+ # decode the one hot encoded predictions
36
+ y_pred_decoded = np.argmax(y_pred, axis=1)
37
+ y_true_decoded = np.argmax(y_true, axis=1)
38
+
39
+ hits = y_pred_decoded == y_true_decoded
40
+ color = np.where(hits, "Hit", "Miss")
41
+ hover_text = [
42
+ "True: " + str(y_true_decoded[i]) + ", Pred: " + str(y_pred_decoded[i])
43
+ for i in range(len(y_pred_decoded))
44
+ ]
45
+
46
+ return px.scatter(
47
+ x=np.arange(len(y_pred_decoded)),
48
+ y=y_true_decoded,
49
+ color=color,
50
+ title="Hits and Misses of Predictions",
51
+ labels={
52
+ "color": "Prediction Correctness",
53
+ "x": "Sample Index",
54
+ "y": "True Label",
55
+ },
56
+ color_discrete_map={"Hit": "blue", "Miss": "red"},
57
+ hover_name=hover_text,
58
+ )
59
+
60
+
61
+ def make_confidence_label(y_pred: np.ndarray, y_test: np.ndarray):
62
+ # decode the one hot endoced predictions
63
+ y_pred_labels = np.argmax(y_pred, axis=1)
64
+ y_test_labels = np.argmax(y_test, axis=1)
65
+ confidence_dict: dict[str, float] = {}
66
+ for idx, class_name in enumerate([str(i) for i in range(10)]):
67
+ class_confidences_idxs = np.where(y_test_labels == idx)[0]
68
+ class_confidences = y_pred[class_confidences_idxs, idx]
69
+ confidence_dict[class_name] = float(np.mean(class_confidences))
70
+ return confidence_dict