yxaenbal commited on
Commit
d594f6a
1 Parent(s): 65ffa9d

add missing attributes from llama

Browse files
Files changed (1) hide show
  1. configuration_aquila.py +10 -0
configuration_aquila.py CHANGED
@@ -92,6 +92,10 @@ class AquilaConfig(PretrainedConfig):
92
  bos_token_id=1,
93
  eos_token_id=2,
94
  tie_word_embeddings=False,
 
 
 
 
95
  **kwargs,
96
  ):
97
  self.vocab_size = vocab_size
@@ -100,10 +104,16 @@ class AquilaConfig(PretrainedConfig):
100
  self.intermediate_size = intermediate_size
101
  self.num_hidden_layers = num_hidden_layers
102
  self.num_attention_heads = num_attention_heads
 
 
 
103
  self.hidden_act = hidden_act
104
  self.initializer_range = initializer_range
105
  self.rms_norm_eps = rms_norm_eps
106
  self.use_cache = use_cache
 
 
 
107
  super().__init__(
108
  pad_token_id=pad_token_id,
109
  bos_token_id=bos_token_id,
 
92
  bos_token_id=1,
93
  eos_token_id=2,
94
  tie_word_embeddings=False,
95
+ num_key_value_heads=None,
96
+ rope_theta=10000.0,
97
+ rope_scaling=None,
98
+ pretraining_tp=1,
99
  **kwargs,
100
  ):
101
  self.vocab_size = vocab_size
 
104
  self.intermediate_size = intermediate_size
105
  self.num_hidden_layers = num_hidden_layers
106
  self.num_attention_heads = num_attention_heads
107
+ if num_key_value_heads is None:
108
+ num_key_value_heads = num_attention_heads
109
+ self.num_key_value_heads = num_key_value_heads
110
  self.hidden_act = hidden_act
111
  self.initializer_range = initializer_range
112
  self.rms_norm_eps = rms_norm_eps
113
  self.use_cache = use_cache
114
+ self.rope_theta = rope_theta
115
+ self.rope_scaling = rope_scaling
116
+ self.pretraining_tp = pretraining_tp
117
  super().__init__(
118
  pad_token_id=pad_token_id,
119
  bos_token_id=bos_token_id,