Jensen-holm commited on
Commit
880505a
1 Parent(s): 84bbd7d

currently debugging mismatched shape error when computing weights and

Browse files
Files changed (5) hide show
  1. app.py +0 -1
  2. nn/activation.py +5 -2
  3. nn/nn.py +1 -17
  4. nn/train.py +37 -12
  5. requirements.txt +5 -3
app.py CHANGED
@@ -1,5 +1,4 @@
1
  from flask import Flask, request, jsonify, Response
2
-
3
  from nn.nn import NN
4
  from nn import train as train_nn
5
  from nn import activation
 
1
  from flask import Flask, request, jsonify, Response
 
2
  from nn.nn import NN
3
  from nn import train as train_nn
4
  from nn import activation
nn/activation.py CHANGED
@@ -26,7 +26,10 @@ def relu(x):
26
 
27
 
28
  def relu_prime(x):
29
- return
 
 
 
30
 
31
 
32
  def sigmoid(x):
@@ -43,4 +46,4 @@ def tanh(x):
43
 
44
 
45
  def tanh_prime(x):
46
- return
 
26
 
27
 
28
  def relu_prime(x):
29
+ if x > 0:
30
+ return 1
31
+ else:
32
+ return 0
33
 
34
 
35
  def sigmoid(x):
 
46
 
47
 
48
  def tanh_prime(x):
49
+ return 1 - np.tanh(x)**2
nn/nn.py CHANGED
@@ -1,6 +1,5 @@
1
  from typing import Callable
2
  import pandas as pd
3
- import numpy as np
4
 
5
 
6
  class NN:
@@ -24,10 +23,7 @@ class NN:
24
  self.target = target
25
  self.data = data
26
 
27
- self.wh: np.array = None
28
- self.wo: np.array = None
29
- self.bh: np.array = None
30
- self.bo: np.array = None
31
  self.func_prime: Callable = None
32
  self.func: Callable = None
33
  self.df: pd.DataFrame = None
@@ -52,18 +48,6 @@ class NN:
52
  assert isinstance(f, Callable)
53
  self.func_prime = f
54
 
55
- def set_bh(self, bh: np.array) -> None:
56
- self.bh = bh
57
-
58
- def set_wh(self, wh: np.array) -> None:
59
- self.wh = wh
60
-
61
- def set_bo(self, bo: np.array) -> None:
62
- self.bo = bo
63
-
64
- def set_wo(self, wo: np.array) -> None:
65
- self.wo = wo
66
-
67
  @classmethod
68
  def from_dict(cls, dct):
69
  """ Creates an instance of NN given a dictionary
 
1
  from typing import Callable
2
  import pandas as pd
 
3
 
4
 
5
  class NN:
 
23
  self.target = target
24
  self.data = data
25
 
26
+ self.loss_hist: list[float] = None
 
 
 
27
  self.func_prime: Callable = None
28
  self.func: Callable = None
29
  self.df: pd.DataFrame = None
 
48
  assert isinstance(f, Callable)
49
  self.func_prime = f
50
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  @classmethod
52
  def from_dict(cls, dct):
53
  """ Creates an instance of NN given a dictionary
nn/train.py CHANGED
@@ -1,7 +1,6 @@
1
  from sklearn.model_selection import train_test_split
2
  from typing import Callable
3
  from nn.nn import NN
4
- import pandas as pd
5
  import numpy as np
6
 
7
 
@@ -11,40 +10,50 @@ def init_weights_biases(nn: NN) -> None:
11
  wh = np.random.randn(nn.input_size, nn.hidden_size) * \
12
  np.sqrt(2 / nn.input_size)
13
  wo = np.random.randn(nn.hidden_size, 1) * np.sqrt(2 / nn.hidden_size)
14
- nn.set_bh(bh)
15
- nn.set_bo(bo)
16
- nn.set_wh(wh)
17
- nn.set_wo(wo)
18
 
19
 
20
  def train(nn: NN) -> dict:
21
- init_weights_biases(nn=nn)
22
  X_train, X_test, y_train, y_test = train_test_split(
23
  nn.X,
24
  nn.y,
25
  test_size=nn.test_size,
26
  )
27
 
 
 
28
  for _ in range(nn.epochs):
29
  # compute hidden output
30
  hidden_output = compute_node(
31
  data=X_train.to_numpy(),
32
- weights=nn.wh,
33
- biases=nn.bh,
34
  func=nn.func,
35
  )
36
 
37
  # compute output layer
38
  y_hat = compute_node(
39
  data=hidden_output,
40
- weights=nn.wo,
41
- biases=nn.bo,
42
  func=nn.func,
43
  )
44
-
 
45
  mse = mean_squared_error(y_train, y_hat)
 
46
 
47
- return {"mse": mse}
 
 
 
 
 
 
 
 
 
48
 
49
 
50
  def compute_node(data: np.array, weights: np.array, biases: np.array, func: Callable) -> np.array:
@@ -53,3 +62,19 @@ def compute_node(data: np.array, weights: np.array, biases: np.array, func: Call
53
 
54
  def mean_squared_error(y: np.array, y_hat: np.array) -> np.array:
55
  return np.mean((y - y_hat) ** 2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from sklearn.model_selection import train_test_split
2
  from typing import Callable
3
  from nn.nn import NN
 
4
  import numpy as np
5
 
6
 
 
10
  wh = np.random.randn(nn.input_size, nn.hidden_size) * \
11
  np.sqrt(2 / nn.input_size)
12
  wo = np.random.randn(nn.hidden_size, 1) * np.sqrt(2 / nn.hidden_size)
13
+ return wh, wo, bh, bo
 
 
 
14
 
15
 
16
  def train(nn: NN) -> dict:
17
+ wh, wo, bh, bo = init_weights_biases(nn=nn)
18
  X_train, X_test, y_train, y_test = train_test_split(
19
  nn.X,
20
  nn.y,
21
  test_size=nn.test_size,
22
  )
23
 
24
+ mse: float = 0.0
25
+ loss_hist: list[float] = []
26
  for _ in range(nn.epochs):
27
  # compute hidden output
28
  hidden_output = compute_node(
29
  data=X_train.to_numpy(),
30
+ weights=wh,
31
+ biases=bh,
32
  func=nn.func,
33
  )
34
 
35
  # compute output layer
36
  y_hat = compute_node(
37
  data=hidden_output,
38
+ weights=wo,
39
+ biases=bo,
40
  func=nn.func,
41
  )
42
+ # compute error & store it
43
+ error = y_hat - y_train
44
  mse = mean_squared_error(y_train, y_hat)
45
+ loss_hist.append(mse)
46
 
47
+ # update weights & biases using gradient descent after
48
+ # computing derivatives.
49
+ wh -= (nn.learning_rate * hidden_weight_prime(X_train, error))
50
+ wo -= (nn.learning_rate * output_weight_prime(hidden_output, error))
51
+ bh -= (nn.learning_rate * hidden_bias_prime(error))
52
+ bo -= (nn.learning_rate * output_bias_prime(error))
53
+ return {
54
+ "mse": mse,
55
+ "loss_hist": loss_hist,
56
+ }
57
 
58
 
59
  def compute_node(data: np.array, weights: np.array, biases: np.array, func: Callable) -> np.array:
 
62
 
63
  def mean_squared_error(y: np.array, y_hat: np.array) -> np.array:
64
  return np.mean((y - y_hat) ** 2)
65
+
66
+
67
+ def hidden_weight_prime(data, error):
68
+ return np.dot(data.T, error)
69
+
70
+
71
+ def output_weight_prime(hidden_output, error):
72
+ return np.dot(hidden_output.T, error)
73
+
74
+
75
+ def hidden_bias_prime(error):
76
+ return np.sum(error, axis=0)
77
+
78
+
79
+ def output_bias_prime(error):
80
+ return np.sum(error, axis=0)
requirements.txt CHANGED
@@ -1,3 +1,5 @@
1
- Flask==3.0.0
2
- pandas==2.1.1
3
- Requests==2.31.0
 
 
 
1
+ Flask==2.2.3
2
+ numpy==1.25.2
3
+ pandas==1.5.3
4
+ requests==2.28.2
5
+ scikit_learn==1.3.1