|
import torch |
|
import torch.nn as nn |
|
|
|
|
|
class RMSNorm(nn.Module): |
|
def __init__( |
|
self, |
|
dim: int, |
|
elementwise_affine=True, |
|
eps: float = 1e-6, |
|
device=None, |
|
dtype=None, |
|
): |
|
""" |
|
Initialize the RMSNorm normalization layer. |
|
|
|
Args: |
|
dim (int): The dimension of the input tensor. |
|
eps (float, optional): A small value added to the denominator for numerical stability. Default is 1e-6. |
|
|
|
Attributes: |
|
eps (float): A small value added to the denominator for numerical stability. |
|
weight (nn.Parameter): Learnable scaling parameter. |
|
|
|
""" |
|
factory_kwargs = {"device": device, "dtype": dtype} |
|
super().__init__() |
|
self.eps = eps |
|
if elementwise_affine: |
|
self.weight = nn.Parameter(torch.ones(dim, **factory_kwargs)) |
|
|
|
def _norm(self, x): |
|
""" |
|
Apply the RMSNorm normalization to the input tensor. |
|
|
|
Args: |
|
x (torch.Tensor): The input tensor. |
|
|
|
Returns: |
|
torch.Tensor: The normalized tensor. |
|
|
|
""" |
|
return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) |
|
|
|
def forward(self, x): |
|
""" |
|
Forward pass through the RMSNorm layer. |
|
|
|
Args: |
|
x (torch.Tensor): The input tensor. |
|
|
|
Returns: |
|
torch.Tensor: The output tensor after applying RMSNorm. |
|
|
|
""" |
|
output = self._norm(x.float()).type_as(x) |
|
if hasattr(self, "weight"): |
|
output = output * self.weight |
|
return output |
|
|
|
|
|
def get_norm_layer(norm_layer): |
|
""" |
|
Get the normalization layer. |
|
|
|
Args: |
|
norm_layer (str): The type of normalization layer. |
|
|
|
Returns: |
|
norm_layer (nn.Module): The normalization layer. |
|
""" |
|
if norm_layer == "layer": |
|
return nn.LayerNorm |
|
elif norm_layer == "rms": |
|
return RMSNorm |
|
else: |
|
raise NotImplementedError(f"Norm layer {norm_layer} is not implemented") |
|
|