Spaces:
Sleeping
Sleeping
Jensen-holm
commited on
Commit
β’
9e506b7
1
Parent(s):
880505a
moving on to testing other datasets!
Browse files- .gitignore +1 -0
- app.py +1 -1
- example/main.py +5 -5
- nn/backprop.py β example/test.py +0 -0
- nn/activation.py +3 -6
- nn/nn.py +10 -3
- nn/train.py +51 -22
.gitignore
CHANGED
@@ -186,3 +186,4 @@ cython_debug/
|
|
186 |
#.idea/
|
187 |
|
188 |
*.swp
|
|
|
|
186 |
#.idea/
|
187 |
|
188 |
*.swp
|
189 |
+
.vscode
|
app.py
CHANGED
@@ -31,7 +31,7 @@ def neural_net():
|
|
31 |
|
32 |
try:
|
33 |
activation.get_activation(nn=net)
|
34 |
-
except Exception
|
35 |
return Response(
|
36 |
response="invalid activation function",
|
37 |
status=400,
|
|
|
31 |
|
32 |
try:
|
33 |
activation.get_activation(nn=net)
|
34 |
+
except Exception:
|
35 |
return Response(
|
36 |
response="invalid activation function",
|
37 |
status=400,
|
example/main.py
CHANGED
@@ -4,11 +4,11 @@ with open("iris.csv", "rb") as csv:
|
|
4 |
iris_data = csv.read()
|
5 |
|
6 |
ARGS = {
|
7 |
-
"epochs":
|
8 |
-
"hidden_size":
|
9 |
-
"learning_rate": 0.
|
10 |
-
"test_size": 0.
|
11 |
-
"activation": "
|
12 |
"features": ["sepal width", "sepal length", "petal width", "petal length"],
|
13 |
"target": "species",
|
14 |
"data": iris_data.decode("utf-8"),
|
|
|
4 |
iris_data = csv.read()
|
5 |
|
6 |
ARGS = {
|
7 |
+
"epochs": 10000,
|
8 |
+
"hidden_size": 8,
|
9 |
+
"learning_rate": 0.0001,
|
10 |
+
"test_size": 0.1,
|
11 |
+
"activation": "relu",
|
12 |
"features": ["sepal width", "sepal length", "petal width", "petal length"],
|
13 |
"target": "species",
|
14 |
"data": iris_data.decode("utf-8"),
|
nn/backprop.py β example/test.py
RENAMED
File without changes
|
nn/activation.py
CHANGED
@@ -22,14 +22,11 @@ def get_activation(nn: NN) -> Callable:
|
|
22 |
|
23 |
|
24 |
def relu(x):
|
25 |
-
return np.
|
26 |
|
27 |
|
28 |
def relu_prime(x):
|
29 |
-
|
30 |
-
return 1
|
31 |
-
else:
|
32 |
-
return 0
|
33 |
|
34 |
|
35 |
def sigmoid(x):
|
@@ -38,7 +35,7 @@ def sigmoid(x):
|
|
38 |
|
39 |
def sigmoid_prime(x):
|
40 |
s = sigmoid(x)
|
41 |
-
return s
|
42 |
|
43 |
|
44 |
def tanh(x):
|
|
|
22 |
|
23 |
|
24 |
def relu(x):
|
25 |
+
return np.maximum(0.0, x)
|
26 |
|
27 |
|
28 |
def relu_prime(x):
|
29 |
+
return np.maximum(0, x)
|
|
|
|
|
|
|
30 |
|
31 |
|
32 |
def sigmoid(x):
|
|
|
35 |
|
36 |
def sigmoid_prime(x):
|
37 |
s = sigmoid(x)
|
38 |
+
return s * (1 - s)
|
39 |
|
40 |
|
41 |
def tanh(x):
|
nn/nn.py
CHANGED
@@ -29,16 +29,23 @@ class NN:
|
|
29 |
self.df: pd.DataFrame = None
|
30 |
self.X: pd.DataFrame = None
|
31 |
self.y: pd.DataFrame = None
|
|
|
|
|
|
|
32 |
|
33 |
def set_df(self, df: pd.DataFrame) -> None:
|
|
|
|
|
|
|
|
|
34 |
assert isinstance(df, pd.DataFrame)
|
35 |
self.df = df
|
36 |
-
|
37 |
-
y = df[self.target]
|
38 |
x = df[self.features]
|
39 |
-
self.
|
40 |
self.X = pd.get_dummies(x, columns=self.features)
|
41 |
self.input_size = len(self.X.columns)
|
|
|
42 |
|
43 |
def set_func(self, f: Callable) -> None:
|
44 |
assert isinstance(f, Callable)
|
|
|
29 |
self.df: pd.DataFrame = None
|
30 |
self.X: pd.DataFrame = None
|
31 |
self.y: pd.DataFrame = None
|
32 |
+
self.y_dummy: pd.DataFrame = None
|
33 |
+
self.input_size: int = None
|
34 |
+
self.output_size: int = None
|
35 |
|
36 |
def set_df(self, df: pd.DataFrame) -> None:
|
37 |
+
|
38 |
+
# issue right now here because we need a way to convert
|
39 |
+
# back and forth from dummies and non dummy vars
|
40 |
+
|
41 |
assert isinstance(df, pd.DataFrame)
|
42 |
self.df = df
|
43 |
+
self.y = df[self.target]
|
|
|
44 |
x = df[self.features]
|
45 |
+
self.y_dummy = pd.get_dummies(self.y, columns=self.target)
|
46 |
self.X = pd.get_dummies(x, columns=self.features)
|
47 |
self.input_size = len(self.X.columns)
|
48 |
+
self.output_size = len(self.y_dummy.columns)
|
49 |
|
50 |
def set_func(self, f: Callable) -> None:
|
51 |
assert isinstance(f, Callable)
|
nn/train.py
CHANGED
@@ -1,32 +1,36 @@
|
|
1 |
from sklearn.model_selection import train_test_split
|
|
|
2 |
from typing import Callable
|
3 |
from nn.nn import NN
|
4 |
import numpy as np
|
5 |
|
6 |
|
7 |
-
def init_weights_biases(nn: NN)
|
|
|
8 |
bh = np.zeros((1, nn.hidden_size))
|
9 |
-
bo = np.zeros((1,
|
10 |
wh = np.random.randn(nn.input_size, nn.hidden_size) * \
|
11 |
np.sqrt(2 / nn.input_size)
|
12 |
-
wo = np.random.randn(nn.hidden_size,
|
|
|
13 |
return wh, wo, bh, bo
|
14 |
|
15 |
|
16 |
def train(nn: NN) -> dict:
|
17 |
wh, wo, bh, bo = init_weights_biases(nn=nn)
|
18 |
X_train, X_test, y_train, y_test = train_test_split(
|
19 |
-
nn.X,
|
20 |
-
nn.
|
21 |
test_size=nn.test_size,
|
|
|
22 |
)
|
23 |
|
24 |
-
|
25 |
loss_hist: list[float] = []
|
26 |
for _ in range(nn.epochs):
|
27 |
# compute hidden output
|
28 |
hidden_output = compute_node(
|
29 |
-
data=X_train
|
30 |
weights=wh,
|
31 |
biases=bh,
|
32 |
func=nn.func,
|
@@ -41,18 +45,43 @@ def train(nn: NN) -> dict:
|
|
41 |
)
|
42 |
# compute error & store it
|
43 |
error = y_hat - y_train
|
44 |
-
mse = mean_squared_error(y_train, y_hat)
|
45 |
loss_hist.append(mse)
|
46 |
|
|
|
47 |
# update weights & biases using gradient descent after
|
48 |
# computing derivatives.
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
return {
|
54 |
-
"
|
55 |
-
"loss_hist": loss_hist,
|
56 |
}
|
57 |
|
58 |
|
@@ -64,17 +93,17 @@ def mean_squared_error(y: np.array, y_hat: np.array) -> np.array:
|
|
64 |
return np.mean((y - y_hat) ** 2)
|
65 |
|
66 |
|
67 |
-
def hidden_weight_prime(data, error):
|
68 |
-
return np.dot(data.T, error)
|
69 |
-
|
70 |
-
|
71 |
-
def output_weight_prime(hidden_output, error):
|
72 |
-
return np.dot(hidden_output.T, error)
|
73 |
-
|
74 |
-
|
75 |
def hidden_bias_prime(error):
|
76 |
return np.sum(error, axis=0)
|
77 |
|
78 |
|
79 |
def output_bias_prime(error):
|
80 |
return np.sum(error, axis=0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from sklearn.model_selection import train_test_split
|
2 |
+
from sklearn.metrics import log_loss, accuracy_score, f1_score
|
3 |
from typing import Callable
|
4 |
from nn.nn import NN
|
5 |
import numpy as np
|
6 |
|
7 |
|
8 |
+
def init_weights_biases(nn: NN):
|
9 |
+
# np.random.seed(0)
|
10 |
bh = np.zeros((1, nn.hidden_size))
|
11 |
+
bo = np.zeros((1, nn.output_size))
|
12 |
wh = np.random.randn(nn.input_size, nn.hidden_size) * \
|
13 |
np.sqrt(2 / nn.input_size)
|
14 |
+
wo = np.random.randn(nn.hidden_size, nn.output_size) * \
|
15 |
+
np.sqrt(2 / nn.hidden_size)
|
16 |
return wh, wo, bh, bo
|
17 |
|
18 |
|
19 |
def train(nn: NN) -> dict:
|
20 |
wh, wo, bh, bo = init_weights_biases(nn=nn)
|
21 |
X_train, X_test, y_train, y_test = train_test_split(
|
22 |
+
nn.X.to_numpy(),
|
23 |
+
nn.y_dummy.to_numpy(),
|
24 |
test_size=nn.test_size,
|
25 |
+
# random_state=0,
|
26 |
)
|
27 |
|
28 |
+
ce: float = 0.0
|
29 |
loss_hist: list[float] = []
|
30 |
for _ in range(nn.epochs):
|
31 |
# compute hidden output
|
32 |
hidden_output = compute_node(
|
33 |
+
data=X_train,
|
34 |
weights=wh,
|
35 |
biases=bh,
|
36 |
func=nn.func,
|
|
|
45 |
)
|
46 |
# compute error & store it
|
47 |
error = y_hat - y_train
|
48 |
+
mse = mean_squared_error(y=y_train, y_hat=y_hat)
|
49 |
loss_hist.append(mse)
|
50 |
|
51 |
+
# compute derivatives of weights & biases
|
52 |
# update weights & biases using gradient descent after
|
53 |
# computing derivatives.
|
54 |
+
dwo = nn.learning_rate * output_weight_prime(hidden_output, error)
|
55 |
+
|
56 |
+
# Use NumPy to sum along the first axis (axis=0)
|
57 |
+
# and then reshape to match the shape of bo
|
58 |
+
dbo = nn.learning_rate * np.sum(output_bias_prime(error), axis=0)
|
59 |
+
|
60 |
+
dhidden = np.dot(error, wo.T) * nn.func_prime(hidden_output)
|
61 |
+
dwh = nn.learning_rate * hidden_weight_prime(X_train, dhidden)
|
62 |
+
dbh = nn.learning_rate * hidden_bias_prime(dhidden)
|
63 |
+
|
64 |
+
wh -= dwh
|
65 |
+
wo -= dwo
|
66 |
+
bh -= dbh
|
67 |
+
bo -= dbo
|
68 |
+
|
69 |
+
# compute final predictions on data not seen
|
70 |
+
hidden_output_test = compute_node(
|
71 |
+
data=X_test,
|
72 |
+
weights=wh,
|
73 |
+
biases=bh,
|
74 |
+
func=nn.func,
|
75 |
+
)
|
76 |
+
y_hat = compute_node(
|
77 |
+
data=hidden_output_test,
|
78 |
+
weights=wo,
|
79 |
+
biases=bo,
|
80 |
+
func=nn.func,
|
81 |
+
)
|
82 |
+
|
83 |
return {
|
84 |
+
"log loss": log_loss(y_true=y_test, y_pred=y_hat)
|
|
|
85 |
}
|
86 |
|
87 |
|
|
|
93 |
return np.mean((y - y_hat) ** 2)
|
94 |
|
95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
def hidden_bias_prime(error):
|
97 |
return np.sum(error, axis=0)
|
98 |
|
99 |
|
100 |
def output_bias_prime(error):
|
101 |
return np.sum(error, axis=0)
|
102 |
+
|
103 |
+
|
104 |
+
def hidden_weight_prime(data, error):
|
105 |
+
return np.dot(data.T, error)
|
106 |
+
|
107 |
+
|
108 |
+
def output_weight_prime(hidden_output, error):
|
109 |
+
return np.dot(hidden_output.T, error)
|