Fill-Mask
Transformers
PyTorch
Safetensors
English
nomic_bert
custom_code
zpn commited on
Commit
5c88019
1 Parent(s): 5c0d092

Update configuration_hf_nomic_bert.py

Browse files
Files changed (1) hide show
  1. configuration_hf_nomic_bert.py +8 -3
configuration_hf_nomic_bert.py CHANGED
@@ -4,7 +4,8 @@ from transformers import GPT2Config
4
  class NomicBertConfig(GPT2Config):
5
  model_type = "nomic_bert"
6
 
7
- def __init__(self,
 
8
  prenorm=False,
9
  parallel_block=False,
10
  parallel_block_tied_norm=False,
@@ -14,7 +15,7 @@ class NomicBertConfig(GPT2Config):
14
  use_flash_attn=False,
15
  use_xentropy=False,
16
  qkv_proj_bias=True,
17
- rotary_emb_base=1000,
18
  rotary_emb_scale_base=None,
19
  rotary_emb_interleaved=False,
20
  mlp_fc1_bias=True,
@@ -25,6 +26,8 @@ class NomicBertConfig(GPT2Config):
25
  dense_seq_output=True,
26
  pad_vocab_size_multiple=1,
27
  tie_word_embeddings=True,
 
 
28
  **kwargs,
29
  ):
30
  self.prenorm = prenorm
@@ -47,5 +50,7 @@ class NomicBertConfig(GPT2Config):
47
  self.type_vocab_size = type_vocab_size
48
  self.dense_seq_output = dense_seq_output
49
  self.pad_vocab_size_multiple = pad_vocab_size_multiple
 
 
50
 
51
- super().__init__(**kwargs)
 
4
  class NomicBertConfig(GPT2Config):
5
  model_type = "nomic_bert"
6
 
7
+ def __init__(
8
+ self,
9
  prenorm=False,
10
  parallel_block=False,
11
  parallel_block_tied_norm=False,
 
15
  use_flash_attn=False,
16
  use_xentropy=False,
17
  qkv_proj_bias=True,
18
+ rotary_emb_base=10_000,
19
  rotary_emb_scale_base=None,
20
  rotary_emb_interleaved=False,
21
  mlp_fc1_bias=True,
 
26
  dense_seq_output=True,
27
  pad_vocab_size_multiple=1,
28
  tie_word_embeddings=True,
29
+ rotary_scaling_factor=1.0,
30
+ max_trained_positions=2048,
31
  **kwargs,
32
  ):
33
  self.prenorm = prenorm
 
50
  self.type_vocab_size = type_vocab_size
51
  self.dense_seq_output = dense_seq_output
52
  self.pad_vocab_size_multiple = pad_vocab_size_multiple
53
+ self.rotary_scaling_factor = rotary_scaling_factor
54
+ self.max_trained_positions = max_trained_positions
55
 
56
+ super().__init__(**kwargs)