Spaces:
Sleeping
Sleeping
File size: 2,485 Bytes
6307b4f 6d0453c 6307b4f 6d0453c 6307b4f 6d0453c 6307b4f 6d0453c 6307b4f 6d0453c 6307b4f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
from abc import ABC, abstractmethod
from nn.activation import SoftMax
import numpy as np
class Loss(ABC):
@abstractmethod
def forward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
pass
@abstractmethod
def backward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
pass
class LogitsLoss(Loss):
pass
class MSE(Loss):
def forward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
return np.sum(np.square(y_hat - y_true)) / y_true.shape[0]
def backward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
return (y_hat - y_true) * (2 / y_true.shape[0])
class CrossEntropy(Loss):
def forward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
y_hat = np.asarray(y_hat)
y_true = np.asarray(y_true)
m = y_true.shape[0]
p = self._softmax(y_hat)
eps = 1e-15 # to prevent log(0)
log_likelihood = -np.log(
np.clip(p[range(m), y_true.argmax(axis=1)], a_min=eps, a_max=None)
)
loss = np.sum(log_likelihood) / m
return loss
def backward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
y_hat = np.asarray(y_hat)
y_true = np.asarray(y_true)
grad = y_hat - y_true
return grad / y_true.shape[0]
@staticmethod
def _softmax(X: np.ndarray) -> np.ndarray:
return SoftMax().forward(X)
class CrossEntropyWithLogits(LogitsLoss):
def forward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
# Apply the log-sum-exp trick for numerical stability
max_logits = np.max(y_hat, axis=1, keepdims=True)
log_sum_exp = np.log(np.sum(np.exp(y_hat - max_logits), axis=1, keepdims=True))
log_probs = y_hat - max_logits - log_sum_exp
# Select the log probability of the true class
loss = -np.sum(log_probs * y_true) / y_true.shape[0]
return loss
def backward(self, y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
# Compute softmax probabilities
exps = np.exp(y_hat - np.max(y_hat, axis=1, keepdims=True))
probs = exps / np.sum(exps, axis=1, keepdims=True)
# Subtract the one-hot encoded labels from the probabilities
grad = (probs - y_true) / y_true.shape[0]
return grad
LOSSES: dict[str, Loss] = {
"MSE": MSE(),
"CrossEntropy": CrossEntropy(),
"CrossEntropyWithLogitsLoss": CrossEntropyWithLogits(),
}
|