# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import torch import torch.nn as nn import torch.nn.functional as F class MLP(nn.Module): def __init__( self, dim: int, hidden_dim: int, ): """ Initializes the multilayer perceptron (MLP) module. Args: dim: The input and output dimensionality. hidden_dim: The dimensionality of the hidden layer. """ super().__init__() self.w1 = nn.Linear(dim, hidden_dim, bias=False) self.w2 = nn.Linear(hidden_dim, dim, bias=False) self.w3 = nn.Linear(dim, hidden_dim, bias=False) def forward(self, x: torch.Tensor) -> torch.Tensor: """ Performs the forward pass of the MLP module. Args: x: The input tensor of shape (batch_size, dim). Returns: The output tensor of shape (batch_size, dim). """ output = self.w2(F.silu(self.w1(x)) * self.w3(x)) return output