cognitivess commited on
Commit
97409b9
1 Parent(s): 1736a41

Update cognitivess_model/configuration_cognitivess.py

Browse files
cognitivess_model/configuration_cognitivess.py CHANGED
@@ -1,48 +1,33 @@
1
- from transformers import LlamaConfig
2
 
3
- class CognitivessConfig(LlamaConfig):
 
 
4
  model_type = "cognitivess"
5
 
6
- def __init__(
7
- self,
8
- vocab_size=128256,
9
- hidden_size=4096,
10
- intermediate_size=14336,
11
- num_hidden_layers=32,
12
- num_attention_heads=32,
13
- num_key_value_heads=8,
14
- hidden_act="silu",
15
- max_position_embeddings=8192,
16
- initializer_range=0.02,
17
- rms_norm_eps=1e-5,
18
- use_cache=True,
19
- pad_token_id=0,
20
- bos_token_id=128000,
21
- eos_token_id=128001,
22
- tie_word_embeddings=False,
23
- attention_dropout=0.0,
24
- pretraining_tp=1,
25
- rope_theta=500000.0,
26
- **kwargs
27
- ):
28
- super().__init__(
29
- vocab_size=vocab_size,
30
- hidden_size=hidden_size,
31
- intermediate_size=intermediate_size,
32
- num_hidden_layers=num_hidden_layers,
33
- num_attention_heads=num_attention_heads,
34
- num_key_value_heads=num_key_value_heads,
35
- hidden_act=hidden_act,
36
- max_position_embeddings=max_position_embeddings,
37
- initializer_range=initializer_range,
38
- rms_norm_eps=rms_norm_eps,
39
- use_cache=use_cache,
40
- pad_token_id=pad_token_id,
41
- bos_token_id=bos_token_id,
42
- eos_token_id=eos_token_id,
43
- tie_word_embeddings=tie_word_embeddings,
44
- pretraining_tp=pretraining_tp,
45
- rope_theta=rope_theta,
46
- **kwargs
47
- )
48
  self.attention_dropout = attention_dropout
 
 
 
 
 
 
 
 
 
1
+ # cognitivess_model/configuration_cognitivess.py
2
 
3
+ from transformers import PretrainedConfig
4
+
5
+ class CognitivessConfig(PretrainedConfig):
6
  model_type = "cognitivess"
7
 
8
+ def __init__(self, hidden_size=4096, num_hidden_layers=32, num_attention_heads=32, intermediate_size=14336,
9
+ hidden_act="silu", layer_norm_eps=1e-05, max_position_embeddings=8192, vocab_size=128256,
10
+ bos_token_id=128000, eos_token_id=128001, pad_token_id=0, attention_dropout=0.0,
11
+ attention_bias=False, tie_word_embeddings=False, mlp_bias=False, pretraining_tp=1,
12
+ rope_scaling=None, rope_theta=500000.0, num_key_value_heads=8, use_cache=True, **kwargs):
13
+ super().__init__(**kwargs)
14
+ self.hidden_size = hidden_size
15
+ self.num_hidden_layers = num_hidden_layers
16
+ self.num_attention_heads = num_attention_heads
17
+ self.intermediate_size = intermediate_size
18
+ self.hidden_act = hidden_act
19
+ self.layer_norm_eps = layer_norm_eps
20
+ self.max_position_embeddings = max_position_embeddings
21
+ self.vocab_size = vocab_size
22
+ self.bos_token_id = bos_token_id
23
+ self.eos_token_id = eos_token_id
24
+ self.pad_token_id = pad_token_id
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  self.attention_dropout = attention_dropout
26
+ self.attention_bias = attention_bias
27
+ self.tie_word_embeddings = tie_word_embeddings
28
+ self.mlp_bias = mlp_bias
29
+ self.pretraining_tp = pretraining_tp
30
+ self.rope_scaling = rope_scaling
31
+ self.rope_theta = rope_theta
32
+ self.num_key_value_heads = num_key_value_heads
33
+ self.use_cache = use_cache