Spaces:

Jensen-holm
/

Numpy-Neuron

Sleeping

App Files Files Community

Jensen-holm commited on Apr 15, 2024

Commit

6d0453c

1 Parent(s): 6307b4f

latest update, finishing classification with MNIST! More details on

Browse files

Files changed (6) hide show

app.py +114 -37
nn/activation.py +1 -4
nn/loss.py +30 -5
nn/nn.py +75 -73
nn/test.py +0 -30
vis.py +67 -17

app.py CHANGED Viewed

@@ -1,71 +1,117 @@
 import plotly.express as px
 from sklearn import datasets
-from sklearn.preprocessing import StandardScaler, OneHotEncoder
 from sklearn.model_selection import train_test_split
 import numpy as np
 import gradio as gr
-from vis import iris_3d_scatter
 import nn  # custom neural network module
-def _preprocess_iris_data(
     seed: int,
 ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
-    iris = datasets.load_iris()
-    X = iris["data"]
-    y = iris["target"]
-    # normalize the features
-    X = StandardScaler().fit_transform(X)
-    # one hot encode the target variables
-    y = OneHotEncoder().fit_transform(y.reshape(-1, 1)).toarray()
-    return train_test_split(
-        X,
         y,
         test_size=0.2,
         random_state=seed,
     )
-X_train, X_test, y_train, y_test = _preprocess_iris_data(seed=1)
-def main(
     Seed: int = 0,
     Activation_Func: str = "SoftMax",
-    Loss_Func: str = "CrossEntropy",
     Epochs: int = 100,
     Hidden_Size: int = 8,
-    Learning_Rate: float = 0.01,
-) -> gr.Plot:
-    iris_classifier = nn.NN(
         epochs=Epochs,
         learning_rate=Learning_Rate,
-        activation_fn=Activation_Func,
-        loss_fn=Loss_Func,
         hidden_size=Hidden_Size,
-        input_size=4,  # number of features in iris dataset
-        output_size=3,  # three classes in iris dataset
         seed=Seed,
     )
-    iris_classifier.train(X_train=X_train, y_train=y_train)
-    loss_fig = px.line(
-        x=[i for i in range(len(iris_classifier._loss_history))],
-        y=iris_classifier._loss_history,
     )
-    return gr.Plot(loss_fig)
 if __name__ == "__main__":
     with gr.Blocks() as interface:
         gr.Markdown("# Backpropagation Playground")
-        with gr.Tab("Classification"):
             with gr.Row():
-                data_plt = iris_3d_scatter()
                 gr.Plot(data_plt)
             with gr.Row():
@@ -75,34 +121,65 @@ if __name__ == "__main__":
             with gr.Row():
                 with gr.Column():
                     numeric_inputs = [
-                        gr.Slider(minimum=100, maximum=10_000, step=50, label="Epochs"),
                         gr.Slider(
                             minimum=2, maximum=64, step=2, label="Hidden Network Size"
                         ),
                         gr.Number(minimum=0.00001, maximum=1.5, label="Learning Rate"),
                     ]
                 with gr.Column():
                     fn_inputs = [
                         gr.Dropdown(
-                            choices=["SoftMax"], label="Activation Function"
                         ),
-                        gr.Dropdown(choices=["CrossEntropy"], label="Loss Function"),
                     ]
             with gr.Row():
                 train_btn = gr.Button("Train", variant="primary")
             # outputs in row below inputs
             with gr.Row():
-                plt_outputs = [gr.Plot()]
             train_btn.click(
-                fn=main,
-                inputs=seed_input + fn_inputs + numeric_inputs,
-                outputs=plt_outputs,
             )
         with gr.Tab("Regression"):
-            ...
     interface.launch(show_error=True)

+from nn.activation import SoftMax
 import plotly.express as px
 from sklearn import datasets
+from sklearn.preprocessing import OneHotEncoder
 from sklearn.model_selection import train_test_split
 import numpy as np
 import gradio as gr
 import nn  # custom neural network module
+from vis import (  # classification visualization funcitons
+    show_digits,
+    hits_and_misses,
+    loss_history_plt,
+    make_confidence_label,
+)
+def _preprocess_digits(
     seed: int,
 ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    digits = datasets.load_digits()
+    n_samples = len(digits.images)
+    data = digits.images.reshape((n_samples, -1))
+    y = OneHotEncoder().fit_transform(digits.target.reshape(-1, 1)).toarray()
+    X_train, X_test, y_train, y_test = train_test_split(
+        data,
         y,
         test_size=0.2,
         random_state=seed,
     )
+    return X_train, X_test, y_train, y_test
+X_train, X_test, y_train, y_test = _preprocess_digits(seed=1)
+def classification(
     Seed: int = 0,
+    Hidden_Layer_Activation: str = "Relu",
     Activation_Func: str = "SoftMax",
+    Loss_Func: str = "CrossEntropyWithLogitsLoss",
     Epochs: int = 100,
     Hidden_Size: int = 8,
+    Learning_Rate: float = 0.001,
+) -> tuple[gr.Plot, gr.Plot, gr.Label]:
+    assert Activation_Func in nn.ACTIVATIONS
+    assert Hidden_Layer_Activation in nn.ACTIVATIONS
+    assert Loss_Func in nn.LOSSES
+    classifier = nn.NN(
         epochs=Epochs,
         learning_rate=Learning_Rate,
+        hidden_activation_fn=nn.ACTIVATIONS[Hidden_Layer_Activation],
+        activation_fn=nn.ACTIVATIONS[Activation_Func],
+        loss_fn=nn.LOSSES[Loss_Func],
         hidden_size=Hidden_Size,
+        input_size=64,  # 8x8 image of pixels
+        output_size=10,  # digits 0-9
         seed=Seed,
     )
+    classifier.train(X_train=X_train, y_train=y_train)
+    pred = classifier.predict(X_test=X_test)
+    hits_and_misses_fig = hits_and_misses(y_pred=pred, y_true=y_test)
+    loss_fig = loss_history_plt(
+        loss_history=classifier._loss_history,
+        loss_fn_name=classifier.loss_fn.__class__.__name__,
     )
+    label_dict = make_confidence_label(y_pred=pred, y_test=y_test)
+    return (
+        gr.Plot(loss_fig, show_label=False),
+        gr.Plot(hits_and_misses_fig, show_label=False),
+        gr.Label(label_dict, label="Classification Confidence Rankings"),
+    )
 if __name__ == "__main__":
     with gr.Blocks() as interface:
         gr.Markdown("# Backpropagation Playground")
+        gr.Markdown(
+            """
+            ## What is this? <br>
+            The Backpropagation Playground is a GUI built around a neural network framework that I have built from scratch
+            in [numpy](https://numpy.org/). In this GUI, you can test different hyper parameters that will be fed to this framework and used
+            to train a neural network on the [MNIST](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) dataset of 8x8 pixel images.
+            ## ⚠️ PLEASE READ ⚠️
+            This application is impossibly slow on the HuggingFace CPU instance that it is running on. It is advised to clone the
+            repository and run it locally.
+            In order to get a decent classification score on the validation set of the MNIST data (hard coded to 20%), you will have to
+            do somewhere between 15,000 epochs and 50,000 epochs with a learning rate around 0.001, and a hidden layer size
+            over 10. (roughly the example that I have provided). Running this many epochs with a hidden layer of that size
+            is pretty expensive on 2 cpu cores that this space has. So if you are actually curious, you might want to clone
+            this and run it locally because it will be much much faster.
+            `git clone https://huggingface.co/spaces/Jensen-holm/Backprop-Playground`
+            After cloning, you will have to install the dependencies from requirements.txt into your environment. (venv reccommended)
+            `pip3 install -r requirements.txt`
+            Then, you can run the application on localhost with the following command.
+            `python3 app.py`
+            """
+        )
+        with gr.Tab("Classification"):
             with gr.Row():
+                data_plt = show_digits()
                 gr.Plot(data_plt)
             with gr.Row():
             with gr.Row():
                 with gr.Column():
                     numeric_inputs = [
+                        gr.Slider(
+                            minimum=100, maximum=100_000, step=50, label="Epochs"
+                        ),
                         gr.Slider(
                             minimum=2, maximum=64, step=2, label="Hidden Network Size"
                         ),
                         gr.Number(minimum=0.00001, maximum=1.5, label="Learning Rate"),
                     ]
                 with gr.Column():
                     fn_inputs = [
                         gr.Dropdown(
+                            choices=["Relu", "Sigmoid", "TanH"],
+                            label="Hidden Layer Activation",
+                        ),
+                        gr.Dropdown(choices=["SoftMax"], label="Output Activation"),
+                        gr.Dropdown(
+                            choices=["CrossEntropy", "CrossEntropyWithLogitsLoss"],
+                            label="Loss Function",
                         ),
                     ]
+            inputs = seed_input + fn_inputs + numeric_inputs
             with gr.Row():
                 train_btn = gr.Button("Train", variant="primary")
+            with gr.Row():
+                gr.Examples(
+                    examples=[
+                        [
+                            2,
+                            "Relu",
+                            "SoftMax",
+                            "CrossEntropyWithLogitsLoss",
+                            15_000,
+                            14,
+                            0.001,
+                        ]
+                    ],
+                    inputs=inputs,
+                )
             # outputs in row below inputs
             with gr.Row():
+                plt_outputs = [
+                    gr.Plot(label="Loss History / Epoch"),
+                    gr.Plot(label="Hits & Misses"),
+                ]
+            with gr.Row():
+                label_output = [gr.Label(label="Class Confidences")]
             train_btn.click(
+                fn=classification,
+                inputs=inputs,
+                outputs=plt_outputs + label_output,
             )
         with gr.Tab("Regression"):
+            gr.Markdown("### Coming Soon")
     interface.launch(show_error=True)

nn/activation.py CHANGED Viewed

@@ -2,9 +2,6 @@ import numpy as np
 from abc import abstractmethod, ABC
-__all__ = ["Activation", "Relu", "TanH", "Sigmoid", "SoftMax", "ACTIVATIONS"]
 class Activation(ABC):
     @abstractmethod
     def forward(self, X: np.ndarray) -> np.ndarray:
@@ -54,6 +51,6 @@ class SoftMax(Activation):
 ACTIVATIONS: dict[str, Activation] = {
     "Relu": Relu(),
     "Sigmoid": Sigmoid(),
-    "Tanh": TanH(),
     "SoftMax": SoftMax(),
 }

 from abc import abstractmethod, ABC
 class Activation(ABC):
     @abstractmethod
     def forward(self, X: np.ndarray) -> np.ndarray:
 ACTIVATIONS: dict[str, Activation] = {
     "Relu": Relu(),
     "Sigmoid": Sigmoid(),
+    "TanH": TanH(),
     "SoftMax": SoftMax(),
 }

nn/loss.py CHANGED Viewed

@@ -3,9 +3,6 @@ from nn.activation import SoftMax
 import numpy as np
-__all__ = ["Loss", "MSE", "CrossEntropy", "LOSSES"]
 class Loss(ABC):
     @abstractmethod
     def forward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
@@ -16,6 +13,10 @@ class Loss(ABC):
         pass
 class MSE(Loss):
     def forward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
         return np.sum(np.square(y_hat - y_true)) / y_true.shape[0]
@@ -30,21 +31,45 @@ class CrossEntropy(Loss):
         y_true = np.asarray(y_true)
         m = y_true.shape[0]
         p = self._softmax(y_hat)
-        log_likelihood = -np.log(p[range(m), y_true.argmax(axis=1)])
         loss = np.sum(log_likelihood) / m
         return loss
     def backward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
         y_hat = np.asarray(y_hat)
         y_true = np.asarray(y_true)
-        return (y_hat - y_true) / y_true.shape[0]
     @staticmethod
     def _softmax(X: np.ndarray) -> np.ndarray:
         return SoftMax().forward(X)
 LOSSES: dict[str, Loss] = {
     "MSE": MSE(),
     "CrossEntropy": CrossEntropy(),
 }

 import numpy as np
 class Loss(ABC):
     @abstractmethod
     def forward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
         pass
+class LogitsLoss(Loss):
+    pass
 class MSE(Loss):
     def forward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
         return np.sum(np.square(y_hat - y_true)) / y_true.shape[0]
         y_true = np.asarray(y_true)
         m = y_true.shape[0]
         p = self._softmax(y_hat)
+        eps = 1e-15  # to prevent log(0)
+        log_likelihood = -np.log(
+            np.clip(p[range(m), y_true.argmax(axis=1)], a_min=eps, a_max=None)
+        )
         loss = np.sum(log_likelihood) / m
         return loss
     def backward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
         y_hat = np.asarray(y_hat)
         y_true = np.asarray(y_true)
+        grad = y_hat - y_true
+        return grad / y_true.shape[0]
     @staticmethod
     def _softmax(X: np.ndarray) -> np.ndarray:
         return SoftMax().forward(X)
+class CrossEntropyWithLogits(LogitsLoss):
+    def forward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
+        # Apply the log-sum-exp trick for numerical stability
+        max_logits = np.max(y_hat, axis=1, keepdims=True)
+        log_sum_exp = np.log(np.sum(np.exp(y_hat - max_logits), axis=1, keepdims=True))
+        log_probs = y_hat - max_logits - log_sum_exp
+        # Select the log probability of the true class
+        loss = -np.sum(log_probs * y_true) / y_true.shape[0]
+        return loss
+    def backward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
+        # Compute softmax probabilities
+        exps = np.exp(y_hat - np.max(y_hat, axis=1, keepdims=True))
+        probs = exps / np.sum(exps, axis=1, keepdims=True)
+        # Subtract the one-hot encoded labels from the probabilities
+        grad = (probs - y_true) / y_true.shape[0]
+        return grad
 LOSSES: dict[str, Loss] = {
     "MSE": MSE(),
     "CrossEntropy": CrossEntropy(),
+    "CrossEntropyWithLogitsLoss": CrossEntropyWithLogits(),
 }

nn/nn.py CHANGED Viewed

@@ -1,55 +1,42 @@
-from typing import Optional
-from nn.activation import ACTIVATIONS, Activation
-from nn.loss import LOSSES, Loss
 import numpy as np
-import gradio as gr
 DTYPE = np.float32
 class NN:
-    def __init__(
-        self,
-        epochs: int,
-        learning_rate: float,
-        hidden_size: int,
-        input_size: int,
-        output_size: int,
-        activation_fn: str,
-        loss_fn: str,
-        seed: int,
-    ) -> None:
-        self.epochs = epochs
-        self.learning_rate = learning_rate
-        self.hidden_size = hidden_size
-        self.input_size = input_size
-        self.output_size = output_size
-        self.seed = seed
-        # try to get activation function and loss funciton
-        act_fn = ACTIVATIONS.get(activation_fn, None)
-        if act_fn is None:
-            raise KeyError(f"Invalid Activation function '{activation_fn}'")
-        loss_fn = LOSSES.get(loss_fn, None)
-        if loss_fn is None:
-            raise KeyError(f"Invalid Activation function '{activation_fn}'")
-        self._activation_fn: Activation = act_fn
-        self._loss_fn: Loss = loss_fn
-        self._loss_history = list()
-        self._weight_history = {
             "wo": [],
             "wh": [],
             "bo": [],
             "bh": [],
-        }
-        self._wo: Optional[np.ndarray] = None
-        self._wh: Optional[np.ndarray] = None
-        self._bo: Optional[np.ndarray] = None
-        self._bh: Optional[np.ndarray] = None
         self._init_weights_and_biases()
     def _init_weights_and_biases(self) -> None:
@@ -79,28 +66,37 @@ class NN:
         )
         return
     def _forward(self, X_train: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
-        """
-        _forward(X_train): ran as the first step of each epoch during training.
-        params:
-            X_train: np.ndarray -> data that we are training the NN on.
-        returns:
-            output layer np array containing the predicted outputs calculated using
-            the weights and biases of the current epoch.
-        """
-        assert self._activation_fn is not None
-        # hidden layer
-        hidden_layer_output = self._activation_fn.forward(
-            np.dot(X_train, self._wh) + self._bh
         )
-        # output layer (prediction layer)
-        y_hat = self._activation_fn.forward(
-            np.dot(hidden_layer_output, self._wo) + self._bo
-        )
-        return y_hat, hidden_layer_output
     def _backward(
         self,
@@ -109,22 +105,18 @@ class NN:
         y_train: np.ndarray,
         hidden_output: np.ndarray,
     ) -> None:
-        assert self._activation_fn is not None
         assert self._wo is not None
-        assert self._loss_fn is not None
         # Calculate the error at the output
         # This should be the derivative of the loss function with respect to the output of the network
-        error_output = self._loss_fn.backward(
-            y_hat, y_train
-        ) * self._activation_fn.backward(y_hat)
         # Calculate gradients for output layer weights and biases
         wo_prime = np.dot(hidden_output.T, error_output) * self.learning_rate
         bo_prime = np.sum(error_output, axis=0, keepdims=True) * self.learning_rate
         # Propagate the error back to the hidden layer
-        error_hidden = np.dot(error_output, self._wo.T) * self._activation_fn.backward(
             hidden_output
         )
@@ -132,18 +124,25 @@ class NN:
         wh_prime = np.dot(X_train.T, error_hidden) * self.learning_rate
         bh_prime = np.sum(error_hidden, axis=0, keepdims=True) * self.learning_rate
         # Update weights and biases
         self._wo -= wo_prime
         self._wh -= wh_prime
         self._bo -= bo_prime
         self._bh -= bh_prime
     def train(self, X_train: np.ndarray, y_train: np.ndarray) -> "NN":
-        assert self._loss_fn is not None
         for _ in gr.Progress().tqdm(range(self.epochs)):
             y_hat, hidden_output = self._forward(X_train=X_train)
-            loss = self._loss_fn.forward(y_hat=y_hat, y_true=y_train)
             self._loss_history.append(loss)
             self._backward(
                 X_train=X_train,
@@ -152,12 +151,15 @@ class NN:
                 hidden_output=hidden_output,
             )
             # keep track of weights an biases at each epoch for visualization
-            self._weight_history["wo"].append(self._wo[0, 0])
-            self._weight_history["wh"].append(self._wh[0, 0])
-            self._weight_history["bo"].append(self._bo[0, 0])
-            self._weight_history["bh"].append(self._bh[0, 0])
         return self
     def predict(self, X_test: np.ndarray) -> np.ndarray:
-        return self._forward(X_train=X_test)[0]

+from dataclasses import dataclass, field
+import gradio as gr
 import numpy as np
+from nn.activation import Activation, Relu, SoftMax
+from nn.loss import Loss
 DTYPE = np.float32
+@dataclass
 class NN:
+    epochs: int
+    learning_rate: float
+    hidden_size: int
+    input_size: int
+    output_size: int
+    hidden_activation_fn: Activation
+    activation_fn: Activation
+    loss_fn: Loss
+    seed: int
+    _loss_history: list = field(default_factory=lambda: [], init=False)
+    _wo: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
+    _wh: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
+    _bo: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
+    _bh: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False)
+    _weight_history: dict[str, list[np.ndarray]] = field(
+        default_factory=lambda: {
             "wo": [],
             "wh": [],
             "bo": [],
             "bh": [],
+        },
+        init=False,
+    )
+    def __post_init__(self) -> None:
         self._init_weights_and_biases()
     def _init_weights_and_biases(self) -> None:
         )
         return
+    # def _forward(self, X_train: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
+    #     # Determine the activation function for the hidden layer
+    #     if self._activation_fn.__class__.__name__ == "SoftMax":
+    #         # Using ReLU for hidden layer when softmax is used in output layer
+    #         hidden_layer_activation = Sigmoid()
+    #     else:
+    #         # Use the specified activation function if not using softmax
+    #         hidden_layer_activation = self._activation_fn
+    #     # Compute the hidden layer output
+    #     hidden_layer_output = hidden_layer_activation.forward(
+    #         np.dot(X_train, self._wh) + self._bh
+    #     )
+    #     # Compute the output layer (prediction layer) using the specified activation function
+    #     y_hat = self._activation_fn.forward(
+    #         np.dot(hidden_layer_output, self._wo) + self._bo
+    #     )
+    #     return y_hat, hidden_layer_output
+    # TODO: make this forward function the main _forward function if
+    # the loss function that the user selected is a "logits" loss. Call
+    # The one above if it is not.
     def _forward(self, X_train: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
+        hidden_layer_output = self.hidden_activation_fn.forward(
+            np.dot(X_train, self._wh) + self._bh,
         )
+        # Output layer does not apply softmax anymore, just return logits
+        logits = np.dot(hidden_layer_output, self._wo) + self._bo
+        return logits, hidden_layer_output
     def _backward(
         self,
         y_train: np.ndarray,
         hidden_output: np.ndarray,
     ) -> None:
         assert self._wo is not None
         # Calculate the error at the output
         # This should be the derivative of the loss function with respect to the output of the network
+        error_output = self.loss_fn.backward(y_hat, y_train)
         # Calculate gradients for output layer weights and biases
         wo_prime = np.dot(hidden_output.T, error_output) * self.learning_rate
         bo_prime = np.sum(error_output, axis=0, keepdims=True) * self.learning_rate
         # Propagate the error back to the hidden layer
+        error_hidden = np.dot(error_output, self._wo.T) * self.activation_fn.backward(
             hidden_output
         )
         wh_prime = np.dot(X_train.T, error_hidden) * self.learning_rate
         bh_prime = np.sum(error_hidden, axis=0, keepdims=True) * self.learning_rate
+        # Gradient clipping to prevent overflow
+        max_norm = 1.0  # You can adjust this threshold
+        wo_prime = np.clip(wo_prime, -max_norm, max_norm)
+        bo_prime = np.clip(bo_prime, -max_norm, max_norm)
+        wh_prime = np.clip(wh_prime, -max_norm, max_norm)
+        bh_prime = np.clip(bh_prime, -max_norm, max_norm)
         # Update weights and biases
         self._wo -= wo_prime
         self._wh -= wh_prime
         self._bo -= bo_prime
         self._bh -= bh_prime
+    # TODO: implement batch size in training, this will speed up the training loop
+    # quite a bit I believe
     def train(self, X_train: np.ndarray, y_train: np.ndarray) -> "NN":
         for _ in gr.Progress().tqdm(range(self.epochs)):
             y_hat, hidden_output = self._forward(X_train=X_train)
+            loss = self.loss_fn.forward(y_hat=y_hat, y_true=y_train)
             self._loss_history.append(loss)
             self._backward(
                 X_train=X_train,
                 hidden_output=hidden_output,
             )
+            # TODO: make a 3d visualization traversing loss plane. Might be too
+            # expenzive to do though.
             # keep track of weights an biases at each epoch for visualization
+            # self._weight_history["wo"].append(self._wo[0, 0])
+            # self._weight_history["wh"].append(self._wh[0, 0])
+            # self._weight_history["bo"].append(self._bo[0, 0])
+            # self._weight_history["bh"].append(self._bh[0, 0])
         return self
     def predict(self, X_test: np.ndarray) -> np.ndarray:
+        pred, _ = self._forward(X_test)
+        return self.activation_fn.forward(pred)

nn/test.py DELETED Viewed

@@ -1,30 +0,0 @@
-from nn.nn import NN
-import unittest
-TEST_NN = NN(
-    epochs=100,
-    learning_rate=0.001,
-    hidden_size=8,
-    input_size=2,
-    output_size=1,
-    activation_fn="Sigmoid",
-    loss_fn="MSE",
-)
-class TestNN(unittest.TestCase):
-    def test_init_w_b(self) -> None:
-        return
-    def test_forward(self) -> None:
-        return
-    def test_backward(self) -> None:
-        return
-    def test_train(self) -> None:
-        return
-if __name__ == "__main__":
-    unittest.main()

vis.py CHANGED Viewed

@@ -1,20 +1,70 @@
-import plotly.express as px
 from sklearn import datasets
-from sklearn.preprocessing import StandardScaler, OneHotEncoder
 import numpy as np
-import os
-def iris_3d_scatter():
-    df = px.data.iris()
-    fig = px.scatter_3d(
-        df,
-        x="sepal_length",
-        y="sepal_width",
-        z="petal_width",
-        color="species",
-        size="petal_length",
-        size_max=18,
-    )
-    fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
     return fig

+import matplotlib
 from sklearn import datasets
+import plotly.graph_objects as go
+import plotly.express as px
+import matplotlib.pyplot as plt
+import matplotlib
 import numpy as np
+matplotlib.use("Agg")
+def show_digits():
+    digits = datasets.load_digits()
+    fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(10, 3))
+    for ax, image, label in zip(axes, digits.images, digits.target):
+        ax.set_axis_off()
+        ax.imshow(image, cmap=plt.cm.gray_r, interpolation="nearest")
+        ax.set_title("Training: %i" % label)
     return fig
+def loss_history_plt(loss_history: list[float], loss_fn_name: str):
+    return px.line(
+        x=[i for i in range(len(loss_history))],
+        y=loss_history,
+        title=f"{loss_fn_name} Loss vs. Training Epoch",
+        labels={
+            "x": "Epochs",
+            "y": f"{loss_fn_name} Loss",
+        },
+    )
+def hits_and_misses(y_pred: np.ndarray, y_true: np.ndarray):
+    # decode the one hot encoded predictions
+    y_pred_decoded = np.argmax(y_pred, axis=1)
+    y_true_decoded = np.argmax(y_true, axis=1)
+    hits = y_pred_decoded == y_true_decoded
+    color = np.where(hits, "Hit", "Miss")
+    hover_text = [
+        "True: " + str(y_true_decoded[i]) + ", Pred: " + str(y_pred_decoded[i])
+        for i in range(len(y_pred_decoded))
+    ]
+    return px.scatter(
+        x=np.arange(len(y_pred_decoded)),
+        y=y_true_decoded,
+        color=color,
+        title="Hits and Misses of Predictions",
+        labels={
+            "color": "Prediction Correctness",
+            "x": "Sample Index",
+            "y": "True Label",
+        },
+        color_discrete_map={"Hit": "blue", "Miss": "red"},
+        hover_name=hover_text,
+    )
+def make_confidence_label(y_pred: np.ndarray, y_test: np.ndarray):
+    # decode the one hot endoced predictions
+    y_pred_labels = np.argmax(y_pred, axis=1)
+    y_test_labels = np.argmax(y_test, axis=1)
+    confidence_dict: dict[str, float] = {}
+    for idx, class_name in enumerate([str(i) for i in range(10)]):
+        class_confidences_idxs = np.where(y_test_labels == idx)[0]
+        class_confidences = y_pred[class_confidences_idxs, idx]
+        confidence_dict[class_name] = float(np.mean(class_confidences))
+    return confidence_dict