Spaces:

suryadev1
/

astra

Running

astra / src /transformer_component.py

6a34fd4 7 months ago

1.7 kB

	import torch.nn as nn
	import torch
	import math

	class GELU(nn.Module):
	"""
	Paper Section 3.4, last paragraph notice that BERT used the GELU instead of RELU
	"""

	def forward(self, x):
	return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))

	class PositionwiseFeedForward(nn.Module):
	"Implements FFN equation."

	def __init__(self, d_model, d_ff, dropout=0.1):
	super().__init__()
	self.w_1 = nn.Linear(d_model, d_ff)
	self.w_2 = nn.Linear(d_ff, d_model)
	self.dropout = nn.Dropout(dropout)
	self.activation = GELU() # Want to try with RELU

	def forward(self, x):
	return self.w_2(self.dropout(self.activation(self.w_1(x))))


	class LayerNorm(nn.Module):
	"Construct a layernorm module (See citation for details)."

	def __init__(self, features, eps=1e-6):
	super().__init__()
	self.a_2 = nn.Parameter(torch.ones(features))
	self.b_2 = nn.Parameter(torch.zeros(features))
	self.eps = eps

	def forward(self, x):
	mean = x.mean(-1, keepdim=True)
	std = x.std(-1, keepdim=True)
	return self.a_2 * (x - mean) / (std + self.eps) + self.b_2


	class SublayerConnection(nn.Module):
	"""
	A residual connection followed by a layer norm.
	Note for code simplicity the norm is first as opposed to last.
	"""

	def __init__(self, size, dropout):
	super().__init__()
	self.norm = LayerNorm(size)
	self.dropout = nn.Dropout(dropout)

	def forward(self, x, sublayer):
	"Apply residual connection to any sublayer with the same size."
	return x + self.dropout(sublayer(self.norm(x)))