Spaces:

Jensen-holm
/

Numpy-Neuron

Sleeping

App Files Files Community

Jensen-holm commited on Oct 11, 2023

Commit

9e506b7

•

1 Parent(s): 880505a

moving on to testing other datasets!

Browse files

Files changed (7) hide show

.gitignore +1 -0
app.py +1 -1
example/main.py +5 -5
nn/backprop.py → example/test.py +0 -0
nn/activation.py +3 -6
nn/nn.py +10 -3
nn/train.py +51 -22

.gitignore CHANGED Viewed

@@ -186,3 +186,4 @@ cython_debug/
 #.idea/
 *.swp

 #.idea/
 *.swp
+.vscode

app.py CHANGED Viewed

@@ -31,7 +31,7 @@ def neural_net():
     try:
         activation.get_activation(nn=net)
-    except Exception as e:
         return Response(
             response="invalid activation function",
             status=400,

     try:
         activation.get_activation(nn=net)
+    except Exception:
         return Response(
             response="invalid activation function",
             status=400,

example/main.py CHANGED Viewed

@@ -4,11 +4,11 @@ with open("iris.csv", "rb") as csv:
     iris_data = csv.read()
 ARGS = {
-    "epochs": 100,
-    "hidden_size": 12,
-    "learning_rate": 0.01,
-    "test_size": 0.3,
-    "activation": "tanh",
     "features": ["sepal width", "sepal length", "petal width", "petal length"],
     "target": "species",
     "data": iris_data.decode("utf-8"),

     iris_data = csv.read()
 ARGS = {
+    "epochs": 10000,
+    "hidden_size": 8,
+    "learning_rate": 0.0001,
+    "test_size": 0.1,
+    "activation": "relu",
     "features": ["sepal width", "sepal length", "petal width", "petal length"],
     "target": "species",
     "data": iris_data.decode("utf-8"),

nn/backprop.py → example/test.py RENAMED Viewed

File without changes

nn/activation.py CHANGED Viewed

@@ -22,14 +22,11 @@ def get_activation(nn: NN) -> Callable:
 def relu(x):
-    return np.max(0.0, x)
 def relu_prime(x):
-    if x > 0:
-        return 1
-    else:
-        return 0
 def sigmoid(x):
@@ -38,7 +35,7 @@ def sigmoid(x):
 def sigmoid_prime(x):
     s = sigmoid(x)
-    return s / (1.0 - s)
 def tanh(x):

 def relu(x):
+    return np.maximum(0.0, x)
 def relu_prime(x):
+    return np.maximum(0, x)
 def sigmoid(x):
 def sigmoid_prime(x):
     s = sigmoid(x)
+    return s * (1 - s)
 def tanh(x):

nn/nn.py CHANGED Viewed

@@ -29,16 +29,23 @@ class NN:
         self.df: pd.DataFrame = None
         self.X: pd.DataFrame = None
         self.y: pd.DataFrame = None
     def set_df(self, df: pd.DataFrame) -> None:
         assert isinstance(df, pd.DataFrame)
         self.df = df
-        # we can only deal with numbers from here on out
-        y = df[self.target]
         x = df[self.features]
-        self.y = pd.get_dummies(y, columns=self.target)
         self.X = pd.get_dummies(x, columns=self.features)
         self.input_size = len(self.X.columns)
     def set_func(self, f: Callable) -> None:
         assert isinstance(f, Callable)

         self.df: pd.DataFrame = None
         self.X: pd.DataFrame = None
         self.y: pd.DataFrame = None
+        self.y_dummy: pd.DataFrame = None
+        self.input_size: int = None
+        self.output_size: int = None
     def set_df(self, df: pd.DataFrame) -> None:
+        # issue right now here because we need a way to convert
+        # back and forth from dummies and non dummy vars
         assert isinstance(df, pd.DataFrame)
         self.df = df
+        self.y = df[self.target]
         x = df[self.features]
+        self.y_dummy = pd.get_dummies(self.y, columns=self.target)
         self.X = pd.get_dummies(x, columns=self.features)
         self.input_size = len(self.X.columns)
+        self.output_size = len(self.y_dummy.columns)
     def set_func(self, f: Callable) -> None:
         assert isinstance(f, Callable)

nn/train.py CHANGED Viewed

@@ -1,32 +1,36 @@
 from sklearn.model_selection import train_test_split
 from typing import Callable
 from nn.nn import NN
 import numpy as np
-def init_weights_biases(nn: NN) -> None:
     bh = np.zeros((1, nn.hidden_size))
-    bo = np.zeros((1, 1))
     wh = np.random.randn(nn.input_size, nn.hidden_size) * \
         np.sqrt(2 / nn.input_size)
-    wo = np.random.randn(nn.hidden_size, 1) * np.sqrt(2 / nn.hidden_size)
     return wh, wo, bh, bo
 def train(nn: NN) -> dict:
     wh, wo, bh, bo = init_weights_biases(nn=nn)
     X_train, X_test, y_train, y_test = train_test_split(
-        nn.X,
-        nn.y,
         test_size=nn.test_size,
     )
-    mse: float = 0.0
     loss_hist: list[float] = []
     for _ in range(nn.epochs):
         # compute hidden output
         hidden_output = compute_node(
-            data=X_train.to_numpy(),
             weights=wh,
             biases=bh,
             func=nn.func,
@@ -41,18 +45,43 @@ def train(nn: NN) -> dict:
         )
         # compute error & store it
         error = y_hat - y_train
-        mse = mean_squared_error(y_train, y_hat)
         loss_hist.append(mse)
         # update weights & biases using gradient descent after
         # computing derivatives.
-        wh -= (nn.learning_rate * hidden_weight_prime(X_train, error))
-        wo -= (nn.learning_rate * output_weight_prime(hidden_output, error))
-        bh -= (nn.learning_rate * hidden_bias_prime(error))
-        bo -= (nn.learning_rate * output_bias_prime(error))
     return {
-        "mse": mse,
-        "loss_hist": loss_hist,
     }
@@ -64,17 +93,17 @@ def mean_squared_error(y: np.array, y_hat: np.array) -> np.array:
     return np.mean((y - y_hat) ** 2)
-def hidden_weight_prime(data, error):
-    return np.dot(data.T, error)
-def output_weight_prime(hidden_output, error):
-    return np.dot(hidden_output.T, error)
 def hidden_bias_prime(error):
     return np.sum(error, axis=0)
 def output_bias_prime(error):
     return np.sum(error, axis=0)

 from sklearn.model_selection import train_test_split
+from sklearn.metrics import log_loss, accuracy_score, f1_score
 from typing import Callable
 from nn.nn import NN
 import numpy as np
+def init_weights_biases(nn: NN):
+    # np.random.seed(0)
     bh = np.zeros((1, nn.hidden_size))
+    bo = np.zeros((1, nn.output_size))
     wh = np.random.randn(nn.input_size, nn.hidden_size) * \
         np.sqrt(2 / nn.input_size)
+    wo = np.random.randn(nn.hidden_size, nn.output_size) * \
+        np.sqrt(2 / nn.hidden_size)
     return wh, wo, bh, bo
 def train(nn: NN) -> dict:
     wh, wo, bh, bo = init_weights_biases(nn=nn)
     X_train, X_test, y_train, y_test = train_test_split(
+        nn.X.to_numpy(),
+        nn.y_dummy.to_numpy(),
         test_size=nn.test_size,
+        # random_state=0,
     )
+    ce: float = 0.0
     loss_hist: list[float] = []
     for _ in range(nn.epochs):
         # compute hidden output
         hidden_output = compute_node(
+            data=X_train,
             weights=wh,
             biases=bh,
             func=nn.func,
         )
         # compute error & store it
         error = y_hat - y_train
+        mse = mean_squared_error(y=y_train, y_hat=y_hat)
         loss_hist.append(mse)
+        # compute derivatives of weights & biases
         # update weights & biases using gradient descent after
         # computing derivatives.
+        dwo = nn.learning_rate * output_weight_prime(hidden_output, error)
+        # Use NumPy to sum along the first axis (axis=0)
+        # and then reshape to match the shape of bo
+        dbo = nn.learning_rate * np.sum(output_bias_prime(error), axis=0)
+        dhidden = np.dot(error, wo.T) * nn.func_prime(hidden_output)
+        dwh = nn.learning_rate * hidden_weight_prime(X_train, dhidden)
+        dbh = nn.learning_rate * hidden_bias_prime(dhidden)
+        wh -= dwh
+        wo -= dwo
+        bh -= dbh
+        bo -= dbo
+    # compute final predictions on data not seen
+    hidden_output_test = compute_node(
+        data=X_test,
+        weights=wh,
+        biases=bh,
+        func=nn.func,
+    )
+    y_hat = compute_node(
+        data=hidden_output_test,
+        weights=wo,
+        biases=bo,
+        func=nn.func,
+    )
     return {
+        "log loss": log_loss(y_true=y_test, y_pred=y_hat)
     }
     return np.mean((y - y_hat) ** 2)
 def hidden_bias_prime(error):
     return np.sum(error, axis=0)
 def output_bias_prime(error):
     return np.sum(error, axis=0)
+def hidden_weight_prime(data, error):
+    return np.dot(data.T, error)
+def output_weight_prime(hidden_output, error):
+    return np.dot(hidden_output.T, error)