Spaces:
Sleeping
Sleeping
File size: 6,726 Bytes
6d0453c 6307b4f e236c9a 6d0453c 6307b4f 29cce3f 6d0453c 29cce3f 6d0453c e11b37a 6d0453c e11b37a 6d0453c e11b37a 6307b4f 6d0453c e11b37a 6307b4f e11b37a 6307b4f 6d0453c 6307b4f 6d0453c 6307b4f 6d0453c 6307b4f 6d0453c 6307b4f e11b37a 6307b4f 6d0453c e11b37a 6d0453c 6307b4f e11b37a 6307b4f e11b37a 6307b4f e11b37a 6307b4f 6d0453c 6307b4f 6d0453c 6307b4f 6d0453c e11b37a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
from dataclasses import dataclass, field
import gradio as gr
import numpy as np
from nn.activation import Activation
from nn.loss import Loss
DTYPE = np.float32
@dataclass
class NN:
epochs: int
learning_rate: float
hidden_size: int
input_size: int
batch_size: float
output_size: int
hidden_activation_fn: Activation
output_activation_fn: Activation
loss_fn: Loss
seed: int
_loss_history: list = field(default_factory=lambda: [], init=False)
_wo: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
_wh: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
_bo: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
_bh: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
# not currently using this, see TODO: at bottom of this file
# _weight_history: dict[str, list[np.ndarray]] = field(
# default_factory=lambda: {
# "wo": [],
# "wh": [],
# "bo": [],
# "bh": [],
# },
# init=False,
# )
def __post_init__(self) -> None:
assert 0 < self.batch_size <= 1
self._init_weights_and_biases()
@classmethod
def from_dict(cls, args: dict) -> "NN":
return cls(**args)
def _init_weights_and_biases(self) -> None:
"""
NN._init_weights_and_biases(): Should only be ran once, right before training loop
in order to initialize the weights and biases randomly.
params:
NN object with hidden layer size, output size, and input size
defined.
returns:
self, modifies _bh, _bo, _wo, _wh NN attributes in place.
"""
np.random.seed(self.seed)
self._bh = np.zeros((1, self.hidden_size), dtype=DTYPE)
self._bo = np.zeros((1, self.output_size), dtype=DTYPE)
self._wh = np.asarray(
np.random.randn(self.input_size, self.hidden_size)
* np.sqrt(2 / self.input_size),
dtype=DTYPE,
)
self._wo = np.asarray(
np.random.randn(self.hidden_size, self.output_size)
* np.sqrt(2 / self.hidden_size),
dtype=DTYPE,
)
# def _forward(self, X_train: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
# # Determine the activation function for the hidden layer
# if self._activation_fn.__class__.__name__ == "SoftMax":
# # Using ReLU for hidden layer when softmax is used in output layer
# hidden_layer_activation = Sigmoid()
# else:
# # Use the specified activation function if not using softmax
# hidden_layer_activation = self._activation_fn
# # Compute the hidden layer output
# hidden_layer_output = hidden_layer_activation.forward(
# np.dot(X_train, self._wh) + self._bh
# )
# # Compute the output layer (prediction layer) using the specified activation function
# y_hat = self._activation_fn.forward(
# np.dot(hidden_layer_output, self._wo) + self._bo
# )
# return y_hat, hidden_layer_output
# TODO: make this forward function the main _forward function if
# the loss function that the user selected is a "logits" loss. Call
# The one above if it is not.
def _forward(self, X_train: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
hidden_layer_output = self.hidden_activation_fn.forward(
np.dot(X_train, self._wh) + self._bh,
)
# Output layer does not apply softmax anymore, just return logits
logits = np.dot(hidden_layer_output, self._wo) + self._bo
return logits, hidden_layer_output
def _backward(
self,
X_train: np.ndarray,
y_hat: np.ndarray,
y_train: np.ndarray,
hidden_output: np.ndarray,
) -> None:
assert self._wo is not None
# Calculate the error at the output
# This should be the derivative of the loss function with respect to the output of the network
error_output = self.loss_fn.backward(y_hat, y_train)
# Calculate gradients for output layer weights and biases
wo_prime = np.dot(hidden_output.T, error_output) * self.learning_rate
bo_prime = np.sum(error_output, axis=0, keepdims=True) * self.learning_rate
# Propagate the error back to the hidden layer
error_hidden = np.dot(
error_output, self._wo.T
) * self.output_activation_fn.backward(hidden_output)
# Calculate gradients for hidden layer weights and biases
wh_prime = np.dot(X_train.T, error_hidden) * self.learning_rate
bh_prime = np.sum(error_hidden, axis=0, keepdims=True) * self.learning_rate
# Gradient clipping to prevent overflow
max_norm = 1.0 # this is an adjustable threshold
wo_prime = np.clip(wo_prime, -max_norm, max_norm)
bo_prime = np.clip(bo_prime, -max_norm, max_norm)
wh_prime = np.clip(wh_prime, -max_norm, max_norm)
bh_prime = np.clip(bh_prime, -max_norm, max_norm)
# Update weights and biases
self._wo -= wo_prime
self._wh -= wh_prime
self._bo -= bo_prime
self._bh -= bh_prime
def train(self, X_train: np.ndarray, y_train: np.ndarray) -> "NN":
for _ in gr.Progress().tqdm(range(self.epochs)):
n_samples = int(self.batch_size * X_train.shape[0])
batch_indeces = np.random.choice(
X_train.shape[0], size=n_samples, replace=False
)
X_train_batch = X_train[batch_indeces]
y_train_batch = y_train[batch_indeces]
y_hat, hidden_output = self._forward(X_train=X_train_batch)
loss = self.loss_fn.forward(y_hat=y_hat, y_true=y_train_batch)
self._loss_history.append(loss)
self._backward(
X_train=X_train_batch,
y_hat=y_hat,
y_train=y_train_batch,
hidden_output=hidden_output,
)
# TODO: make a 3d visualization traversing loss plane. Might be too
# expenzive to do though.
# keep track of weights an biases at each epoch for visualization
# self._weight_history["wo"].append(self._wo[0, 0])
# self._weight_history["wh"].append(self._wh[0, 0])
# self._weight_history["bo"].append(self._bo[0, 0])
# self._weight_history["bh"].append(self._bh[0, 0])
return self
def predict(self, X_test: np.ndarray) -> np.ndarray:
pred, _ = self._forward(X_test)
return self.output_activation_fn.forward(pred)
|