Crystalcareai
commited on
Commit
•
95f3d4b
1
Parent(s):
686fb4f
Update configuration_gemmoe.py
Browse files- configuration_gemmoe.py +4 -0
configuration_gemmoe.py
CHANGED
@@ -126,6 +126,8 @@ class GemmoeConfig(PretrainedConfig):
|
|
126 |
pad_token_id=0,
|
127 |
eos_token_id=1,
|
128 |
bos_token_id=2,
|
|
|
|
|
129 |
tie_word_embeddings=True,
|
130 |
rope_theta=10000.0,
|
131 |
attention_bias=False,
|
@@ -143,6 +145,8 @@ class GemmoeConfig(PretrainedConfig):
|
|
143 |
self.num_hidden_layers = num_hidden_layers
|
144 |
self.num_attention_heads = num_attention_heads
|
145 |
self.head_dim = head_dim
|
|
|
|
|
146 |
self.num_key_value_heads = num_key_value_heads
|
147 |
self.hidden_act = hidden_act
|
148 |
self.initializer_range = initializer_range
|
|
|
126 |
pad_token_id=0,
|
127 |
eos_token_id=1,
|
128 |
bos_token_id=2,
|
129 |
+
hidden_act="gelu_pytorch_tanh",
|
130 |
+
hidden_activation=None,
|
131 |
tie_word_embeddings=True,
|
132 |
rope_theta=10000.0,
|
133 |
attention_bias=False,
|
|
|
145 |
self.num_hidden_layers = num_hidden_layers
|
146 |
self.num_attention_heads = num_attention_heads
|
147 |
self.head_dim = head_dim
|
148 |
+
self.hidden_act = hidden_act
|
149 |
+
self.hidden_activation = hidden_activation
|
150 |
self.num_key_value_heads = num_key_value_heads
|
151 |
self.hidden_act = hidden_act
|
152 |
self.initializer_range = initializer_range
|